In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet

In [26]:
df_model = pd.read_parquet('../data/train/train_east_tasmin_CMIP6_projections.parquet.gzip')

In [27]:
# Remove rows where tsmin is zero because models do not project tsmin
df_model = df_model[df_model['tsmin_df'] > 0]

In [28]:
df_model = df_model.drop('spatial_ref', axis=1)

In [29]:
df_model['model_scenario'] = df_model['model'] + '_' + df_model['scenario']

In [32]:
df_model['lat'] = df_model['lat'].round(2)
df_model['lon'] = df_model['lon'].round(2)
df_model['time'] = pd.to_datetime(df_model['time'], utc=True)

In [33]:
df_model = df_model.pivot_table(index=['lat', 'lon', 'time'], columns='model_scenario', values='tsmin_df', aggfunc='mean').reset_index()

In [34]:
df_model

model_scenario,lat,lon,time,ACCESS-CM2_ssp126,ACCESS-CM2_ssp245,ACCESS-CM2_ssp370,ACCESS-CM2_ssp585,ACCESS-ESM_ssp126,ACCESS-ESM_ssp245,ACCESS-ESM_ssp370,...,INM-CM5-0_ssp370,INM-CM5-0_ssp585,KACE-1-0-G_ssp126,KACE-1-0-G_ssp245,KACE-1-0-G_ssp370,KACE-1-0-G_ssp585,MIROC-ES2L_ssp126,MIROC-ES2L_ssp245,MIROC-ES2L_ssp370,MIROC-ES2L_ssp585
0,35.1,-83.62,2015-01-01 12:00:00+00:00,251.63474,252.06392,252.15169,251.70645,273.33630,273.53960,273.49640,...,269.90930,270.09020,278.10648,278.83720,278.24330,278.27466,264.03247,264.03406,262.47360,264.32068
1,35.1,-83.62,2015-01-02 12:00:00+00:00,254.15775,255.24792,255.49002,254.17682,265.58267,264.84850,264.90930,...,276.97530,277.15240,281.22906,281.58093,281.46558,281.40340,265.15390,265.91705,264.48440,264.52344
2,35.1,-83.62,2015-01-03 12:00:00+00:00,267.16354,267.74893,268.47195,267.29413,266.42035,265.71085,266.17026,...,285.51932,285.56870,285.28800,284.30340,284.72250,284.55493,267.80255,268.20600,266.26380,266.61410
3,35.1,-83.62,2015-01-04 12:00:00+00:00,273.47693,272.42407,273.11612,273.44495,263.98276,264.14957,264.12283,...,269.51440,269.55612,276.68470,277.61444,276.48834,276.30090,269.71542,269.97687,268.55447,268.64594
4,35.1,-83.62,2015-01-05 12:00:00+00:00,272.02573,272.58884,272.39140,272.39800,265.02353,265.68875,265.05500,...,267.51138,267.77520,273.78326,273.09235,271.57773,271.41202,264.71835,264.73535,264.35602,264.46368
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
286379,39.6,-78.12,2021-12-27 12:00:00+00:00,265.77496,272.40560,270.90020,268.89868,271.28950,276.47840,275.30685,...,272.74744,269.23395,274.96344,271.29617,278.78714,259.01720,266.93384,270.62204,266.63736,268.49435
286380,39.6,-78.12,2021-12-28 12:00:00+00:00,272.93190,272.64508,278.27853,264.97437,269.86578,274.35434,272.98145,...,272.83356,269.36716,272.91098,272.91916,274.65942,261.71085,269.60352,266.90454,266.68300,270.02658
286381,39.6,-78.12,2021-12-29 12:00:00+00:00,267.07077,278.03910,269.40897,262.29263,274.14380,272.68290,274.35516,...,272.02713,267.32290,270.18580,275.26980,274.93588,260.98212,271.79230,270.02580,266.27673,272.29114
286382,39.6,-78.12,2021-12-30 12:00:00+00:00,268.62207,279.24940,268.46478,265.46100,270.40808,272.80368,274.43436,...,273.90220,269.62130,270.07843,275.46362,271.30750,262.82413,267.53308,278.56836,267.32758,273.84360


In [66]:
df_actual_1 = pd.read_parquet('../data/external/test_era5/easternmountain_H_area_orchards_part1.parquet.gzip')

In [67]:
df_actual_2 = pd.read_parquet('../data/external/test_era5/easternmountain_H_area_orchards_part2.parquet.gzip')

In [68]:
df_actual = pd.concat([df_actual_1, df_actual_2], ignore_index=True)

In [69]:
df_actual['lat'] = df_actual['lat'].round(2)
df_actual['lon'] = df_actual['lon'].round(2)
df_actual['timestamp_utc'] = pd.to_datetime(df_actual['timestamp_utc'])

In [70]:
df_actual.set_index('timestamp_utc', inplace=True)

In [71]:
df_actual = df_actual.resample('D').min().reset_index()

In [74]:
df_actual.loc[:, :'temperature_degC']

Unnamed: 0,timestamp_utc,lat,lon,temperature_degC
0,2023-01-01 00:00:00+00:00,34.75,-84.25,0.190002
1,2023-01-02 00:00:00+00:00,34.75,-84.25,-0.320007
2,2023-01-03 00:00:00+00:00,34.75,-84.25,5.800018
3,2023-01-04 00:00:00+00:00,34.75,-84.25,6.740021
4,2023-01-05 00:00:00+00:00,34.75,-84.25,0.290009
...,...,...,...,...
360,2023-12-27 00:00:00+00:00,34.75,-84.25,2.260010
361,2023-12-28 00:00:00+00:00,34.75,-84.25,-0.799988
362,2023-12-29 00:00:00+00:00,34.75,-84.25,-2.589996
363,2023-12-30 00:00:00+00:00,34.75,-84.25,-2.410004


In [54]:
df = pd.merge(df_model, df_actual, how='left', left_on=['lat', 'lon', 'time'], right_on=['lat', 'lon', 'timestamp_utc'])

In [55]:
df

Unnamed: 0,lat,lon,time,ACCESS-CM2_ssp126,ACCESS-CM2_ssp245,ACCESS-CM2_ssp370,ACCESS-CM2_ssp585,ACCESS-ESM_ssp126,ACCESS-ESM_ssp245,ACCESS-ESM_ssp370,...,total_solar_radiation,surface_direct_solar_radiation,surface_diffuse_solar_radiation,total_cloud_cover,total_precipitation_mm_of_water_equivalent,relative_humidity_0_1,model,timestamp_local,snowfall_mm_of_water_equivalent,snow_depth_mm_of_water_equivalent
0,35.1,-83.62,2015-01-01 12:00:00+00:00,251.63474,252.06392,252.15169,251.70645,273.33630,273.53960,273.49640,...,,,,,,,,,,
1,35.1,-83.62,2015-01-02 12:00:00+00:00,254.15775,255.24792,255.49002,254.17682,265.58267,264.84850,264.90930,...,,,,,,,,,,
2,35.1,-83.62,2015-01-03 12:00:00+00:00,267.16354,267.74893,268.47195,267.29413,266.42035,265.71085,266.17026,...,,,,,,,,,,
3,35.1,-83.62,2015-01-04 12:00:00+00:00,273.47693,272.42407,273.11612,273.44495,263.98276,264.14957,264.12283,...,,,,,,,,,,
4,35.1,-83.62,2015-01-05 12:00:00+00:00,272.02573,272.58884,272.39140,272.39800,265.02353,265.68875,265.05500,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
286379,39.6,-78.12,2021-12-27 12:00:00+00:00,265.77496,272.40560,270.90020,268.89868,271.28950,276.47840,275.30685,...,,,,,,,,,,
286380,39.6,-78.12,2021-12-28 12:00:00+00:00,272.93190,272.64508,278.27853,264.97437,269.86578,274.35434,272.98145,...,,,,,,,,,,
286381,39.6,-78.12,2021-12-29 12:00:00+00:00,267.07077,278.03910,269.40897,262.29263,274.14380,272.68290,274.35516,...,,,,,,,,,,
286382,39.6,-78.12,2021-12-30 12:00:00+00:00,268.62207,279.24940,268.46478,265.46100,270.40808,272.80368,274.43436,...,,,,,,,,,,
