In [1]:
import pandas as pd
import numpy
from IPython.display import Markdown
from sklearn.preprocessing import StandardScaler

from utils.common.merge_df import (merge_forcast_and_train_df,
                                   remove_NaN_rows)
from utils.common.train_model import (train_model,
                                      test_model)

In [2]:
df_forecast = pd.read_parquet('../Daten/forecasts.parquet')
df_test1 = pd.read_parquet('../Daten/energy_test1.parquet')
df_test2 = pd.read_parquet('../Daten/energy_test2.parquet')
df_train = pd.read_parquet('../Daten/energy_train.parquet')

In [3]:
df_train, df_forecast = remove_NaN_rows(df_train, df_forecast)
merged_df = merge_forcast_and_train_df(df_forecast, df_train)

In [4]:
merged_df.head()

Unnamed: 0,dtm,ref_datetime,Solar_capacity_mwp,Solar_MWh,valid_time,SolarDownwardRadiation,CloudCover,Temperature,Weather Model,valid_datetime
0,2020-09-21 00:00:00+00:00,2020-09-20 00:00:00+00:00,2130.510089,0.0,24,0.0,0.327759,12.216522,DWD ICON,2020-09-21 00:00:00+00:00
1,2020-09-21 00:00:00+00:00,2020-09-20 00:00:00+00:00,2130.510089,0.0,24,0.0,0.002,12.240011,NCEP GFS,2020-09-21 00:00:00+00:00
2,2020-09-21 01:00:00+00:00,2020-09-20 00:00:00+00:00,2130.510089,0.0,25,0.008838,0.436067,12.015985,DWD ICON,2020-09-21 01:00:00+00:00
3,2020-09-21 01:00:00+00:00,2020-09-20 00:00:00+00:00,2130.510089,0.0,25,0.0,0.003,12.032483,NCEP GFS,2020-09-21 01:00:00+00:00
4,2020-09-21 02:00:00+00:00,2020-09-20 00:00:00+00:00,2130.510089,0.0,26,0.015384,0.468752,11.813635,DWD ICON,2020-09-21 02:00:00+00:00


In [5]:
model, evaluation_md, scaler, test_col = train_model(merged_df, ["SolarDownwardRadiation", "Temperature", "CloudCover"], "Solar_MWh")

In [6]:
display(Markdown(evaluation_md))

### LinearRegression() Evaluation

| Dataset | R² | RMSE | MAE | Rows | Columns |
|---------|--------:|------------:|--------:|-------:|-------:|
| Train   | 0.86552 | 67.52 | 37.09 | 31524 | 3 |
| Test    | 0.86950 | 66.51 | 36.41 | 7881 | 3 |

### Top 10 Coefficients

| Feature Name           |   Coefficient |
|:-----------------------|--------------:|
| SolarDownwardRadiation |     173.193   |
| CloudCover             |       7.67729 |
| Temperature            |      -1.95333 |

Number of coefficients that are zero: 0/3


Alpha value: 0


In [7]:
# take model and generate the .pkl files to upload
merged_test_df = merge_forcast_and_train_df(df_forecast, df_test1)
merged_test2_df = merge_forcast_and_train_df(df_forecast, df_test2)

# scale the data
merged_test_pred_df = test_model(model, scaler, merged_test_df, test_col)
merged_test2_pred_df = test_model(model, scaler, merged_test2_df, test_col)

In [8]:
merged_test_df

Unnamed: 0,dtm,ref_datetime,Solar_capacity_mwp,valid_time,SolarDownwardRadiation,CloudCover,Temperature,Weather Model,valid_datetime
0,2023-01-01 00:00:00+00:00,2022-12-31 00:00:00+00:00,2262.233423,24,0.000000,0.999570,10.612988,DWD ICON,2023-01-01 00:00:00+00:00
1,2023-01-01 00:00:00+00:00,2022-12-31 00:00:00+00:00,2262.233423,24,0.000000,0.995250,11.345912,NCEP GFS,2023-01-01 00:00:00+00:00
2,2023-01-01 01:00:00+00:00,2022-12-31 00:00:00+00:00,2262.233423,25,0.000000,1.000000,10.425348,DWD ICON,2023-01-01 01:00:00+00:00
3,2023-01-01 01:00:00+00:00,2022-12-31 00:00:00+00:00,2262.233423,25,0.000000,0.988300,10.601108,NCEP GFS,2023-01-01 01:00:00+00:00
4,2023-01-01 02:00:00+00:00,2022-12-31 00:00:00+00:00,2262.233423,26,0.021875,1.000000,10.405902,DWD ICON,2023-01-01 02:00:00+00:00
...,...,...,...,...,...,...,...,...,...
14473,2023-10-30 22:00:00+00:00,2023-10-29 18:00:00+00:00,2452.979156,28,0.000000,0.834100,10.004807,NCEP GFS,2023-10-30 22:00:00+00:00
14474,2023-10-30 23:00:00+00:00,2023-10-29 18:00:00+00:00,2452.950587,29,0.001482,0.684612,8.263562,DWD ICON,2023-10-30 23:00:00+00:00
14475,2023-10-30 23:00:00+00:00,2023-10-29 18:00:00+00:00,2452.950587,29,0.000000,0.919800,9.773641,NCEP GFS,2023-10-30 23:00:00+00:00
14476,2023-10-31 00:00:00+00:00,2023-10-30 00:00:00+00:00,2452.922019,24,0.015666,0.638090,8.282245,DWD ICON,2023-10-31 00:00:00+00:00


In [9]:


merged_test_pred_df.to_pickle('test1.pkl')
merged_test2_pred_df.to_pickle('test2.pkl')