In [17]:
import pandas as pd

In [18]:
df = pd.read_csv("new_datasets/gramineas_2014_2025.csv")
df['fecha'] = pd.to_datetime(df['fecha'])

### Adding Biological features (last year)

In [19]:
years = df['fecha'].dt.year.unique()
memory_features = []
    
for year in years:
    # Datos de este año
    mask_year = df['fecha'].dt.year == year
    df_year = df[mask_year].copy()
        
    if len(df_year) < 260: 
        continue

    mask_am = df_year['fecha'].dt.month.isin([4, 5])
    mask_ma = df_year['fecha'].dt.month.isin([3, 4, 5, 6, 7, 8])
    mask_summer = df_year['fecha'].dt.month.isin([6, 7, 8])
    mask_autumn = df_year['fecha'].dt.month.isin([10, 11, 12])
    mask_winter = df_year['fecha'].dt.month.isin([1, 2, 3])
    # 1. Precip Otoño (Sept-Nov)
    precip_autumn = df_year.loc[mask_autumn, 'rain (mm)'].sum()
        
    # 2. Precip Invierno (Dic-Feb)
    precip_winter = df_year.loc[mask_winter, 'rain (mm)'].sum()
        
    # 3. Temp media Primavera-Verano (Mar-Agosto)
    temp_mean_ss = df_year.loc[mask_ma, 'temperature_2m (°C)'].mean()

    # 4. Humedad Primavera-Verano (Mar-Agosto)
    humidity_sum_ss = df_year.loc[mask_ma, 'relative_humidity_2m (%)'].sum()

    # 5. CO2 medio April-May
    co2_mean_am = df_year.loc[mask_am, 'CO (mg/m3)'].mean()
        
    # 6. O3 medio April-May
    o3_mean_am = df_year.loc[mask_am, 'O3 (ug/m3)'].mean()
        
    # 7. NO2 medio Abril-Mayo
    no2_mean_am = df_year.loc[mask_am, 'NO2 (ug/m3)'].mean()
        
    # 8. Dias de sequía verano (Jun-Agosto, Precip < 1mm)
    drought_days = (df_year.loc[mask_summer, 'rain (mm)'] < 1.0).sum()
        
    # 9. GDD: Growing Degree Days (Feb-Dic)
    # Necesitan una medida de calor por encima de 5º.
    # Las gramineas no están nada activas en enero
    mask_gdd = (df_year['fecha'].dt.month >= 2)
    gdd = (df_year.loc[mask_gdd, 'temperature_2m (°C)'] - 5).clip(lower=0).sum()

    memory_features.append({
        'year_target': year + 1,
        'precip_autumn_last_year': precip_autumn,
        'precip_winter_last_year': precip_winter,
        'temp_mean_spring_summer_last': temp_mean_ss,
        'humidity_sum_spring_summer_last': humidity_sum_ss,
        'co2_mean_april_may_last': co2_mean_am,
        'o3_mean_april_may_last': o3_mean_am,
        'no2_mean_april_may_last': no2_mean_am,
        'drought_days_summer_last': drought_days,
        'growing_degree_days_last': gdd
    })
    
df_memory = pd.DataFrame(memory_features)
df['year'] = df['fecha'].dt.year
df = pd.merge(df, df_memory, left_on='year', right_on='year_target', how='left')
df.drop(columns=['year_target', 'year'], inplace=True)
df

Unnamed: 0.1,Unnamed: 0,tipo_polinico,granos_de_polen_x_metro_cubico,fecha,año,temperature_2m (°C),wind_speed_10m (km/h),wind_gusts_10m (km/h),relative_humidity_2m (%),wind_direction_10m (°),...,Tolueno (ug/m3),precip_autumn_last_year,precip_winter_last_year,temp_mean_spring_summer_last,humidity_sum_spring_summer_last,co2_mean_april_may_last,o3_mean_april_may_last,no2_mean_april_may_last,drought_days_summer_last,growing_degree_days_last
0,0,Gramíneas,0.0,2014-01-01,2014,1.066667,12.283333,22.187500,89.375000,173.541667,...,0.8,,,,,,,,,
1,1,Gramíneas,0.0,2014-01-02,2014,3.341667,15.962500,28.450000,85.958333,171.000000,...,1.6,,,,,,,,,
2,2,Gramíneas,0.0,2014-01-03,2014,5.587500,14.808333,27.833333,87.875000,195.541667,...,2.4,,,,,,,,,
3,3,Gramíneas,1.0,2014-01-04,2014,6.216667,15.016667,29.950000,85.125000,171.166667,...,0.9,,,,,,,,,
4,4,Gramíneas,0.0,2014-01-05,2014,4.987500,12.566667,23.804167,91.958333,227.375000,...,1.1,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4340,4340,Gramíneas,0.0,2025-11-19,2025,2.995833,12.670833,27.837500,86.291667,190.916667,...,4.0,5.975,6.808333,15.858311,12733.625,0.319672,71.901639,21.557377,92.0,2699.775
4341,4341,Gramíneas,1.0,2025-11-20,2025,1.550000,13.504167,29.700000,90.666667,233.500000,...,5.3,5.975,6.808333,15.858311,12733.625,0.319672,71.901639,21.557377,92.0,2699.775
4342,4342,Gramíneas,1.0,2025-11-21,2025,-1.445833,8.129167,17.633333,87.500000,272.708333,...,2.4,5.975,6.808333,15.858311,12733.625,0.319672,71.901639,21.557377,92.0,2699.775
4343,4343,Gramíneas,0.0,2025-11-22,2025,-2.070833,7.358333,15.595833,87.250000,214.583333,...,3.3,5.975,6.808333,15.858311,12733.625,0.319672,71.901639,21.557377,92.0,2699.775


In [20]:
df.to_csv('new_datasets/gramineas_2014_2025.csv')