In [1]:
import pandas as pd
import numpy as np


In [2]:
from tesi.database.di import get_session_maker
from tesi.zappai.di import (
    get_cds_api,
    get_climate_generative_model_repository,
    get_future_climate_data_repository,
    get_location_repository,
    get_past_climate_data_repository,
)
from tesi.zappai.repositories.dtos import ClimateDataDTO, FutureClimateDataDTO
from tesi.zappai.utils import common


session_maker = get_session_maker()
cds_api = get_cds_api()
location_repository = get_location_repository(session_maker=session_maker)
past_climate_data_repository = get_past_climate_data_repository(
    session_maker=session_maker,
    cds_api=cds_api,
    location_repository=location_repository,
)
future_climate_data_repository = get_future_climate_data_repository(
    session_maker=session_maker, cds_api=cds_api
)
climate_generative_model_repository = get_climate_generative_model_repository(
    session_maker=session_maker,
    location_repository=location_repository,
    past_climate_data_repository=past_climate_data_repository,
    future_climate_data_repository=future_climate_data_repository,
)

location = await location_repository.get_location_by_country_and_name(
    country=common.EXAMPLE_LOCATION_COUNTRY, name=common.EXAMPLE_LOCATION_NAME
)
if location is None:
    location = await location_repository.create_location(
        country=common.EXAMPLE_LOCATION_COUNTRY,
        name=common.EXAMPLE_LOCATION_NAME,
        longitude=common.EXAMPLE_LONGITUDE,
        latitude=common.EXAMPLE_LATITUDE,
    )
past_climate_data_df = ClimateDataDTO.from_list_to_dataframe(
    await past_climate_data_repository.get_past_climate_data(location.id)
)
future_climate_data_df = FutureClimateDataDTO.from_list_to_dataframe(
    await future_climate_data_repository.get_future_climate_data_for_nearest_coordinates(
        longitude=location.longitude,
        latitude=location.latitude,
        start_year=1970,
        start_month=1,
    )
)

await climate_generative_model_repository.delete_climate_generative_model(
    location_id=location.id
)
climate_generative_model, x_df_test, y_df_test = (
    await climate_generative_model_repository.create_model_for_location(
        location.id
    )
)
model = climate_generative_model.model
x_scaler = climate_generative_model.x_scaler
y_scaler = climate_generative_model.y_scaler


2024-06-10 23:53:18.345805: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-10 23:53:18.348163: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-10 23:53:18.380518: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50


2024-06-10 23:53:20.890107: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-10 23:53:20.890583: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 1.1487 - root_mean_squared_error: 1.0712 - val_loss: 0.8313 - val_root_mean_squared_error: 0.9118
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.0024 - root_mean_squared_error: 1.0007 - val_loss: 0.7667 - val_root_mean_squared_error: 0.8756
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9259 - root_mean_squared_error: 0.9619 - val_loss: 0.7082 - val_root_mean_squared_error: 0.8415
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.8273 - root_mean_squared_error: 0.9094 - val_loss: 0.6541 - val_root_mean_squared_error: 0.8088
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.7970 - root_mean_squared_error: 0.8922 - val_loss: 0.6037 - val_root_mean_squared_error: 0.7770
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [3]:
from typing import cast


x_test_scaled = cast(np.ndarray, x_scaler.transform(x_df_test))
y_test_scaled = cast(np.ndarray, y_scaler.transform(y_df_test))

In [4]:
SEQ_LENGTH = 12

In [5]:
generated_data = await climate_generative_model_repository.generate_climate_data_from_last_past_climate_data(
    location_id=location.id # type: ignore
)
generated_data

(55, 17)
(55, 10)


Unnamed: 0_level_0,Unnamed: 1_level_0,surface_net_thermal_radiation,volumetric_soil_water_layer_3,soil_temperature_level_3,total_cloud_cover,surface_net_solar_radiation,2m_dewpoint_temperature,snowfall,sin_year,cos_year,10m_u_component_of_wind,10m_v_component_of_wind,2m_temperature,evaporation,total_precipitation,surface_pressure,surface_solar_radiation_downwards,surface_thermal_radiation_downwards
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2024,6,-7874871.0,0.12202,303.003784,0.325753,19563754.0,289.750122,-6.6e-05,0.5,-0.8660254,1.45367,-0.767491,296.334045,1.3e-05,4.208952e-05,100727.203125,317.720551,350.751099
2024,7,-8926489.0,0.127585,299.363739,0.181082,19848468.0,287.917145,5e-06,1.224647e-16,-1.0,0.55891,-1.967647,301.334106,1.4e-05,0.0001239232,100769.820312,334.41507,377.009735
2024,8,-8975105.0,0.127361,299.632751,0.167174,19289234.0,288.184998,8e-06,-0.5,-0.8660254,0.563675,-1.132628,301.673767,2e-05,0.0004675805,100933.125,296.5271,376.974365
2024,9,-8991709.0,0.127426,299.678619,0.162486,19190834.0,288.235748,9e-06,-0.8660254,-0.5,0.274924,-0.889209,296.034058,3.1e-05,0.0006955144,101335.507812,221.370544,358.331879
2024,10,-8992670.0,0.127423,299.68396,0.162206,19179266.0,288.24115,9e-06,-1.0,-1.83697e-16,-0.105953,-0.337615,292.686401,2e-05,0.0002051339,101775.375,175.507004,335.226013
2024,11,-8992801.0,0.127422,299.684662,0.162168,19177696.0,288.241882,9e-06,-0.8660254,0.5,1.418953,0.516295,288.572266,3e-05,0.001186082,101814.765625,100.444435,331.546295
2024,12,-8992818.0,0.127422,299.684784,0.162163,19177484.0,288.241974,9e-06,-0.5,0.8660254,1.879587,1.629478,286.130249,2.6e-05,0.001764613,101981.5,79.284607,315.827393
2025,5,-8747173.0,0.13286,298.950226,0.218485,19437806.0,287.291046,1e-06,0.8660254,-0.5,-0.774936,-0.577503,295.110168,1e-05,0.0002705439,101174.890625,287.381165,353.424957
2025,6,-8747173.0,0.13286,298.950226,0.218485,19437804.0,287.291046,1e-06,0.5,-0.8660254,1.599512,0.31496,297.919189,1.7e-05,0.00107548,100441.601562,320.666138,371.34549
2025,7,-8747173.0,0.13286,298.950226,0.218485,19437804.0,287.291046,1e-06,1.224647e-16,-1.0,1.944831,-0.855374,300.765564,2e-05,0.0004460884,100505.960938,311.469879,387.377411


In [6]:
filtered_past_climate_data = past_climate_data_df[get_features()]
filtered_future_climate_data_df = future_climate_data_df.drop(
    columns=["latitude", "longitude"], errors="ignore"
)
# start generating from 1 year ago
seed_data = filtered_past_climate_data[(-12 - SEQ_LENGTH) : -12]
start_year, start_month = seed_data.index[-1]

filtered_future_climate_data_df = future_climate_data_df[
    ((future_climate_data_df.index.get_level_values("year") > start_year))
    | (
        (future_climate_data_df.index.get_level_values("year") == start_year)
        & (future_climate_data_df.index.get_level_values("month") > start_month)
    )
]

# this generated data start from one year ago
generated_data_df = generate_data_from_seed(
    model=model,
    x_scaler=x_scaler,
    y_scaler=y_scaler,
    seed_data=seed_data.to_numpy(),
    future_climate_data_df=filtered_future_climate_data_df,
)
true_data_df = past_climate_data_df[-12:]

NameError: name 'get_features' is not defined

In [None]:
def plot_values(df: pd.DataFrame, values: list[str]):
    df_reset = df.reset_index()
    df_reset["YYYY-MM"] = (
        df_reset["year"].astype(str) + "-" + df_reset["month"].astype(str).str.zfill(2)
    )
    df_reset.plot(
        x="YYYY-MM",
        y=values,
        marker="o",
    )
    plt.xlabel("Month")
    plt.ylabel("Values")
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.show()

In [None]:
generated_data_df = generated_data_df[:12]
generated_data_df["actual_soil_temperature_level_3"] = true_data_df["soil_temperature_level_3"]
true_data_df["cmip5_2m_temperature"] = filtered_future_climate_data_df[:12]["2m_temperature"]
plot_values(df=generated_data_df, values=["soil_temperature_level_3", "actual_soil_temperature_level_3"])
plot_values(df=true_data_df, values=["2m_temperature", "cmip5_2m_temperature"])

In [None]:
plot_values(df=total_df, values=["volumetric_soil_water_layer_3"])