In [7]:
from tesi.database.di import get_session_maker
from tesi.zappai.di import (
    get_cds_api,
    get_crop_repository,
    get_crop_yield_data_repository,
    get_location_repository,
)
from tesi.zappai.exceptions import CropNotFoundError
from tesi.zappai.repositories.climate_generative_model_repository import (
    FEATURES as CLIMATE_GENERATIVE_MODEL_FEATURES,
)
from tesi.zappai.repositories.dtos import CropYieldDataDTO

CROP_YIELD_MODEL_FEATURES = [
    "sowing_year",
    "sowing_month",
    "harvest_year",
    "harvest_month",
    "_yield",
    "surface_solar_radiation_downwards_mean",
    "surface_solar_radiation_downwards_sum",
    "surface_solar_radiation_downwards_std",
    "surface_solar_radiation_downwards_min",
    "surface_solar_radiation_downwards_max",
    "surface_thermal_radiation_downwards_mean",
    "surface_thermal_radiation_downwards_sum",
    "surface_thermal_radiation_downwards_std",
    "surface_thermal_radiation_downwards_min",
    "surface_thermal_radiation_downwards_max",
    "surface_net_solar_radiation_mean",
    "surface_net_solar_radiation_sum",
    "surface_net_solar_radiation_std",
    "surface_net_solar_radiation_min",
    "surface_net_solar_radiation_max",
    "surface_net_thermal_radiation_mean",
    "surface_net_thermal_radiation_sum",
    "surface_net_thermal_radiation_std",
    "surface_net_thermal_radiation_min",
    "surface_net_thermal_radiation_max",
    "total_cloud_cover_mean",
    "total_cloud_cover_sum",
    "total_cloud_cover_std",
    "total_cloud_cover_min",
    "total_cloud_cover_max",
    "2m_dewpoint_temperature_mean",
    "2m_dewpoint_temperature_sum",
    "2m_dewpoint_temperature_std",
    "2m_dewpoint_temperature_min",
    "2m_dewpoint_temperature_max",
    "soil_temperature_level_3_mean",
    "soil_temperature_level_3_sum",
    "soil_temperature_level_3_std",
    "soil_temperature_level_3_min",
    "soil_temperature_level_3_max",
    "volumetric_soil_water_layer_3_mean",
    "volumetric_soil_water_layer_3_sum",
    "volumetric_soil_water_layer_3_std",
    "volumetric_soil_water_layer_3_min",
    "volumetric_soil_water_layer_3_max",
    "2m_temperature_mean",
    "2m_temperature_sum",
    "2m_temperature_std",
    "2m_temperature_min",
    "2m_temperature_max",
    "total_precipitation_mean",
    "total_precipitation_sum",
    "total_precipitation_std",
    "total_precipitation_min",
    "total_precipitation_max",
]

session_maker = get_session_maker()
location_repository = get_location_repository(session_maker=session_maker)
crop_repository = get_crop_repository(session_maker=session_maker)
crop_yield_data_repository = get_crop_yield_data_repository(
    session_maker=session_maker,
    crop_repository=crop_repository,
    location_repository=location_repository,
)

crop = await crop_repository.get_crop_by_name("maize")
if crop is None:
    raise CropNotFoundError()
crop_yield_data = await crop_yield_data_repository.get_crop_yield_data(crop_id=crop.id)
crop_yield_data_df = CropYieldDataDTO.from_list_to_dataframe(crop_yield_data)
crop_yield_data_df

Unnamed: 0,index,id,location_id,crop_id,sowing_year,sowing_month,harvest_year,harvest_month,_yield
0,293,b1ad4aeb-5f97-4bd5-9688-e69df69a1ed6,3d3b83c1-9dd2-4b5b-a06e-bd1f83a8188c,ab7e4a12-5fe5-4701-8cb0-e2f6d084c0f3,1980,3,1980,7,2450.000000
1,297,0d8cd626-8d0f-4672-a66b-1bb9878ac1a2,3d3b83c1-9dd2-4b5b-a06e-bd1f83a8188c,ab7e4a12-5fe5-4701-8cb0-e2f6d084c0f3,1980,3,1980,7,4385.500000
2,183,2741e6c8-351b-4937-9d49-c5fec92f1cda,423761b0-10b5-4ffa-b56b-ef5df7215e85,ab7e4a12-5fe5-4701-8cb0-e2f6d084c0f3,1980,4,1980,10,4400.000000
3,185,f245a458-2656-44e9-af82-02d190358114,fd9e5b23-d2ce-4f45-840b-9de60be1044f,ab7e4a12-5fe5-4701-8cb0-e2f6d084c0f3,1980,4,1980,10,10900.000000
4,446,c679e6f1-4d44-4f31-b0e3-de36eb422b7e,36cedd5a-0494-4798-8d35-6329dd45dbf4,ab7e4a12-5fe5-4701-8cb0-e2f6d084c0f3,1980,5,1980,10,9110.000000
...,...,...,...,...,...,...,...,...,...
719,268,92cf56bf-5546-432d-b766-549543f3c871,03aaa7fa-c8cc-4a9b-ae08-cb0dba323004,ab7e4a12-5fe5-4701-8cb0-e2f6d084c0f3,2016,12,2017,5,6516.583333
720,365,76fb78e7-d911-47e2-b61b-21107249ad14,e159462b-51c1-4288-b0c2-a2daebc1c5f7,ab7e4a12-5fe5-4701-8cb0-e2f6d084c0f3,2017,3,2017,10,10936.000000
721,260,f3fd20ad-0f0a-4178-bd15-d27976f28246,d7636c4f-6399-4275-8fab-4d3f531d898a,ab7e4a12-5fe5-4701-8cb0-e2f6d084c0f3,2017,4,2017,9,4050.000000
722,329,1721e8f5-bee0-4bd6-85df-d5dbf125af67,8d3b2bfc-0d0f-4ba2-b2aa-a30fdda829ff,ab7e4a12-5fe5-4701-8cb0-e2f6d084c0f3,2017,4,2017,8,11504.500000


In [8]:
import pandas as pd
from tesi.zappai.di import (
    get_future_climate_data_repository,
    get_past_climate_data_repository,
)
from tesi.zappai.exceptions import LocationNotFoundError
from tesi.zappai.repositories.dtos import ClimateDataDTO

cds_api = get_cds_api()
past_climate_data_repository = get_past_climate_data_repository(
    session_maker=session_maker,
    cds_api=cds_api,
    location_repository=location_repository,
)
future_climate_data_repository = get_future_climate_data_repository(
    session_maker=session_maker, cds_api=cds_api
)

enriched_crop_yield_data_df = pd.DataFrame()

processed = 0


def print_processed():
    print(f"\rPROCESSED {processed}/{len(crop_yield_data_df)}", end="")


print_processed()

for index, row in crop_yield_data_df.iterrows():
    location = await location_repository.get_location_by_id(row["location_id"])
    if location is None:
        raise LocationNotFoundError()
    past_climate_data_df = ClimateDataDTO.from_list_to_dataframe(
        await past_climate_data_repository.get_past_climate_data(
            location_id=location.id,
            year_from=row["sowing_year"],
            month_from=row["sowing_month"],
            year_to=row["harvest_year"],
            month_to=row["harvest_month"],
        )
    )
    past_climate_data_df = past_climate_data_df[CLIMATE_GENERATIVE_MODEL_FEATURES]
    stats = ["mean", "sum", "std", "min", "max"]
    climate_data_stats = past_climate_data_df.agg(
        {feature: stats for feature in CLIMATE_GENERATIVE_MODEL_FEATURES}, # type: ignore
        axis=0,
    ) # type: ignore
    result_climate_data_stats_df = pd.DataFrame()
    for feature in CLIMATE_GENERATIVE_MODEL_FEATURES:
        for stat in stats:
            result_climate_data_stats_df[f"{feature}_{stat}"] = [
                climate_data_stats.loc[stat][feature]
            ] 
    # convert the row to a DataFrame
    crop_yield_data_row_df = pd.DataFrame([row])
    # since the row was a Series, remove the useless index column that the DataFrame inherited
    crop_yield_data_row_df = crop_yield_data_row_df.drop(columns=["index"])
    crop_yield_data_row_df = crop_yield_data_row_df.reset_index(drop=True)
    enriched_crop_yield_data_row = pd.concat([crop_yield_data_row_df, result_climate_data_stats_df], axis=1)
    enriched_crop_yield_data_df = pd.concat(
        [
            enriched_crop_yield_data_df,
            enriched_crop_yield_data_row
        ],
        axis=0,
    )
    processed += 1
    print_processed()

enriched_crop_yield_data_df = enriched_crop_yield_data_df[CROP_YIELD_MODEL_FEATURES]
enriched_crop_yield_data_df = enriched_crop_yield_data_df.reset_index(drop=True)
enriched_crop_yield_data_df.to_csv("tmp_enriched_crop_yield_data.csv")
enriched_crop_yield_data_df

PROCESSED 724/724

Unnamed: 0,sowing_year,sowing_month,harvest_year,harvest_month,_yield,surface_solar_radiation_downwards_mean,surface_solar_radiation_downwards_sum,surface_solar_radiation_downwards_std,surface_solar_radiation_downwards_min,surface_solar_radiation_downwards_max,...,2m_temperature_mean,2m_temperature_sum,2m_temperature_std,2m_temperature_min,2m_temperature_max,total_precipitation_mean,total_precipitation_sum,total_precipitation_std,total_precipitation_min,total_precipitation_max
0,1980,3,1980,7,2450.000000,1.492664e+07,7.463319e+07,2.259774e+06,1.187789e+07,1.795294e+07,...,298.948633,1494.743164,1.290209,297.227305,300.498265,0.005672,0.028358,0.002272,0.002224,0.008091
1,1980,3,1980,7,4385.500000,1.492664e+07,7.463319e+07,2.259774e+06,1.187789e+07,1.795294e+07,...,298.948633,1494.743164,1.290209,297.227305,300.498265,0.005672,0.028358,0.002272,0.002224,0.008091
2,1980,4,1980,10,4400.000000,1.602599e+07,1.121819e+08,4.262941e+06,8.639122e+06,2.108692e+07,...,288.512288,2019.586018,4.541800,281.377774,293.239102,0.002632,0.018421,0.000924,0.001446,0.003809
3,1980,4,1980,10,10900.000000,1.633093e+07,1.143165e+08,4.134066e+06,9.204025e+06,2.137154e+07,...,289.133427,2023.933988,4.565221,281.938730,293.975897,0.002188,0.015318,0.000966,0.001171,0.003612
4,1980,5,1980,10,9110.000000,1.968655e+07,1.181193e+08,4.120434e+06,1.339957e+07,2.400054e+07,...,294.760263,1768.561580,5.372894,285.702096,300.357666,0.002790,0.016740,0.001212,0.001089,0.004019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
719,2016,12,2017,5,6516.583333,1.838980e+07,1.103388e+08,1.691587e+06,1.696905e+07,2.170686e+07,...,294.519211,1767.115265,1.357224,292.773246,296.597136,0.004417,0.026503,0.003090,0.000387,0.007879
720,2017,3,2017,10,10936.000000,2.128693e+07,1.702954e+08,5.102423e+06,1.279282e+07,2.655724e+07,...,292.629490,2341.035918,5.406481,284.686877,298.908217,0.001339,0.010713,0.000794,0.000636,0.003156
721,2017,4,2017,9,4050.000000,2.104846e+07,1.262907e+08,4.024446e+06,1.386079e+07,2.446434e+07,...,295.281411,1771.688467,4.947208,288.201561,300.141113,0.002919,0.017515,0.001749,0.001644,0.006407
722,2017,4,2017,8,11504.500000,2.072073e+07,1.036037e+08,1.923371e+06,1.776398e+07,2.247837e+07,...,292.877662,1464.388309,5.774836,284.081490,297.555779,0.002245,0.011225,0.001139,0.000579,0.003528
