In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import lightgbm as lgb

In [2]:
ntc_df_original = pd.read_csv("../data/NTC.csv", parse_dates=["datetime"], index_col="datetime")

ntc_df = ntc_df_original.copy()
ntc_df.index = ntc_df.tz_localize('Europe/Brussels', ambiguous="infer").tz_convert('UTC').index
ntc_df

Unnamed: 0_level_0,CH_AT,CH_DE,CH_FR,CH_IT,AT_CH,DE_CH,FR_CH,IT_CH
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-12-31 23:00:00+00:00,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
2019-01-01 00:00:00+00:00,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
2019-01-01 01:00:00+00:00,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
2019-01-01 02:00:00+00:00,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
2019-01-01 03:00:00+00:00,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
...,...,...,...,...,...,...,...,...
2021-12-31 18:00:00+00:00,1200.0,4000.0,1400.0,4069.0,1200.0,800.0,3200.0,1810.0
2021-12-31 19:00:00+00:00,1200.0,4000.0,1400.0,4069.0,1200.0,800.0,3200.0,1810.0
2021-12-31 20:00:00+00:00,1200.0,4000.0,1400.0,3953.0,1200.0,800.0,3200.0,1810.0
2021-12-31 21:00:00+00:00,1200.0,4000.0,1400.0,3780.0,1200.0,800.0,3200.0,1810.0


In [3]:
active_losses_df = pd.read_csv(
    "../data/Avtice-losses.csv", skiprows=2, names=["datetime", "MWh"], parse_dates=["datetime"], index_col="datetime")

active_losses_df.index = active_losses_df.tz_localize('Europe/Brussels', ambiguous="infer").tz_convert('UTC').index
active_losses_df.index = active_losses_df.index - pd.Timedelta(minutes=15)
active_losses_df["MWh"] = active_losses_df["MWh"]/1000
active_losses_df

Unnamed: 0_level_0,MWh
datetime,Unnamed: 1_level_1
2018-12-31 23:00:00+00:00,39.143346
2018-12-31 23:15:00+00:00,32.788069
2018-12-31 23:30:00+00:00,33.018916
2018-12-31 23:45:00+00:00,34.574673
2019-01-01 00:00:00+00:00,33.417096
...,...
2021-12-31 21:45:00+00:00,40.720617
2021-12-31 22:00:00+00:00,38.156039
2021-12-31 22:15:00+00:00,36.290837
2021-12-31 22:30:00+00:00,35.947704


In [4]:
active_losses_df[active_losses_df.index.duplicated()]

Unnamed: 0_level_0,MWh
datetime,Unnamed: 1_level_1


In [5]:
forecast_generation_df = pd.read_csv("../data/Forecast-renewable-generation.csv", parse_dates=["datetime"], index_col="datetime")
forecast_generation_df.index = forecast_generation_df.tz_localize('Europe/Brussels', ambiguous="infer").tz_convert('UTC').index
forecast_generation_df

Unnamed: 0_level_0,solar_fore_de [MW],solar_fore_it [MW],wind_fore_de [MW],wind_fore_it [MW]
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-12-31 23:00:00+00:00,0.0,0.0,21344.8514,4302.6977
2019-01-01 00:00:00+00:00,0.0,0.0,23052.3310,4596.5916
2019-01-01 01:00:00+00:00,0.0,0.0,24969.9701,4478.5564
2019-01-01 02:00:00+00:00,0.0,0.0,27082.9626,4323.3712
2019-01-01 03:00:00+00:00,0.0,0.0,26890.9717,4231.8283
...,...,...,...,...
2021-12-31 18:00:00+00:00,0.0,0.0,36997.7200,1108.4000
2021-12-31 19:00:00+00:00,0.0,0.0,35666.9300,1077.9700
2021-12-31 20:00:00+00:00,0.0,0.0,34383.8800,1048.2800
2021-12-31 21:00:00+00:00,0.0,0.0,33075.2500,1078.7800


In [6]:
forecast_temperature_df = pd.read_csv("../data/Forecast-temperature.csv", parse_dates=["datetime"], index_col="datetime")
#forecast_temperature_df.index = forecast_temperature_df.tz_localize('Europe/Brussels', ambiguous="infer").tz_convert('UTC').index
forecast_temperature_df

Unnamed: 0_level_0,temperature_fore_ch,temperature_fore_fr,temperature_fore_de,temperature_fore_it
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-01 01:00:00,4.1067,5.9729,7.4268,4.0281
2019-01-01 07:00:00,1.7595,5.5398,6.3425,2.7281
2019-01-01 13:00:00,4.8823,8.6471,7.4596,7.7386
2019-01-01 19:00:00,3.1899,6.7846,5.2598,3.9186
2019-01-02 01:00:00,4.0144,6.4055,4.1154,3.5786
...,...,...,...,...
2021-12-31 19:00:00,8.5300,9.9600,10.7900,9.5900
2021-12-31 20:00:00,8.0000,9.4000,10.6300,9.1100
2021-12-31 21:00:00,7.5000,8.8800,10.5100,8.6700
2021-12-31 22:00:00,6.9700,8.5100,10.3200,8.1400


In [7]:
forecast_temperature_df = forecast_temperature_df.resample('1H').interpolate(method='linear')

ValueError: cannot reindex on an axis with duplicate labels