In [106]:
from sklearn.model_selection import train_test_split
import pandas as pd
import lightgbm as lgb

In [107]:
import re

In [108]:
from sklearn.metrics import mean_absolute_error


# 1. Data Loading & Preparation

In [109]:
df_NTC = pd.read_csv("../../data/NTC.csv")
df_losses = pd.read_csv("../../data/Avtice-losses.csv")

df_renewable = pd.read_csv("../../data/Forecast-renewable-generation.csv")
df_temp = pd.read_csv("../../data/Forecast-temperature.csv")

df_losses = df_losses.drop(df_losses.index[0])

In [110]:
df_losses = df_losses.rename(columns={
    'Unnamed: 0': 'datetime',
})

In [111]:

df_NTC['datetime'] = pd.to_datetime(df_NTC['datetime'])
df_losses['datetime'] = pd.to_datetime(df_losses['datetime'])
df_renewable['datetime'] = pd.to_datetime(df_renewable['datetime'])
df_temp['datetime'] = pd.to_datetime(df_temp['datetime'])

In [112]:
df_losses['Wirkverluste/Active Losses'] = pd.to_numeric(df_losses['Wirkverluste/Active Losses'], errors='coerce')

In [113]:
# Convert "Zeitstempel" column to datetime format, if not already
#df_losses['datetime'] = pd.to_datetime(df_losses['datetime'])

# Subtract 15 minutes from each date in the "Zeitstempel" column
df_losses['datetime'] = df_losses['datetime'] - pd.Timedelta(minutes=15)



In [114]:

df_losses_new = df_losses.set_index('datetime')

hourly_loss = df_losses_new.resample('1H', closed='left', label='left').sum()

In [115]:
hourly_loss["Wirkverluste/Active Losses"]=hourly_loss["Wirkverluste/Active Losses"]/1000


In [116]:
#hourly_loss

In [117]:


# Check if the data is already in 6-hour intervals
if (df_temp['datetime'].diff().dt.total_seconds() / 3600).median() == 6:
    # If the data is in 6-hour intervals, leave the datetime column as it is
    hourly_temp = df_temp
else:
    # If the data is not in 6-hour intervals, resample it to hourly intervals
    hourly_temp = df_temp.resample('H', on='datetime').mean().reset_index()

    # Fill missing values in other columns with the values from the initial 6-hourly data
    columns_to_fill = hourly_temp.columns.difference(['datetime'])
    hourly_temp[columns_to_fill] = hourly_temp[columns_to_fill].fillna(method='ffill', limit=6)
    # Replace '6' with the appropriate number if you want to fill more or fewer hours

# Print the resulting hourly data
display(hourly_temp.head(10))

Unnamed: 0,datetime,temperature_fore_ch,temperature_fore_fr,temperature_fore_de,temperature_fore_it
0,2019-01-01 01:00:00,4.1067,5.9729,7.4268,4.0281
1,2019-01-01 02:00:00,4.1067,5.9729,7.4268,4.0281
2,2019-01-01 03:00:00,4.1067,5.9729,7.4268,4.0281
3,2019-01-01 04:00:00,4.1067,5.9729,7.4268,4.0281
4,2019-01-01 05:00:00,4.1067,5.9729,7.4268,4.0281
5,2019-01-01 06:00:00,4.1067,5.9729,7.4268,4.0281
6,2019-01-01 07:00:00,1.7595,5.5398,6.3425,2.7281
7,2019-01-01 08:00:00,1.7595,5.5398,6.3425,2.7281
8,2019-01-01 09:00:00,1.7595,5.5398,6.3425,2.7281
9,2019-01-01 10:00:00,1.7595,5.5398,6.3425,2.7281


In [118]:
hourly_loss = hourly_loss.reset_index()

In [119]:
merged_df = df_NTC.merge(hourly_loss, on='datetime', how='outer')
merged_df = merged_df.merge(df_renewable, on='datetime', how='outer')
merged_df = merged_df.merge(hourly_temp, on='datetime', how='outer')

In [120]:
na_counts_per_column = merged_df.isna().sum()

In [121]:
na_counts_per_column 

datetime                         0
CH_AT                            3
CH_DE                            3
CH_FR                            3
CH_IT                            3
AT_CH                            3
DE_CH                            3
FR_CH                            3
IT_CH                            3
Wirkverluste/Active Losses       0
solar_fore_de [MW]               3
solar_fore_it [MW]               3
wind_fore_de [MW]                3
wind_fore_it [MW]                3
temperature_fore_ch           1578
temperature_fore_fr           1578
temperature_fore_de           1578
temperature_fore_it           1578
dtype: int64

In [122]:
merged_df_incomplete = merged_df.dropna()

In [123]:
merged_df_incomplete=merged_df_incomplete.rename(columns={'Wirkverluste/Active Losses': 'Loss'})

In [124]:
merged_df_incomplete=merged_df_incomplete.rename(columns={'solar_fore_de [MW]': 'solar_fore_de'})
merged_df_incomplete=merged_df_incomplete.rename(columns={'solar_fore_it [MW]': 'solar_fore_it'})
merged_df_incomplete=merged_df_incomplete.rename(columns={'wind_fore_de [MW]': 'wind_fore_de'})
merged_df_incomplete=merged_df_incomplete.rename(columns={'wind_fore_it [MW]': 'wind_fore_it'})
#drop the datetime

#merged_df_incomplete=merged_df_incomplete.drop('datetime')


In [125]:
merged_df_incomplete = merged_df_incomplete.reset_index()

# 2. Create 24 different dataframes

### 2.1. Create 1 Big Dataset with all the Lags

In [126]:
merged_df_incomplete['hour'] = merged_df_incomplete['datetime'].dt.hour

In [127]:
merged_ready_1= merged_df_incomplete

In [128]:
features = ['CH_DE', 'CH_FR', 'CH_IT', 'AT_CH', 'DE_CH', 'FR_CH', 'IT_CH',
               'solar_fore_de', 'solar_fore_it', 'wind_fore_de', 'wind_fore_it',
               'temperature_fore_ch', 'temperature_fore_fr', 'temperature_fore_de',
               'temperature_fore_it','Loss']

    # Create the lagged features
for feature in features:
    for lag in range(1, 336):
        col_name = f"{feature}_lag{lag}"
        merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)

merged_ready_1 = merged_ready_1.iloc[167:, :]

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_ready_1[feature].shift(lag)
  merged_ready_1[col_name] = merged_read

In [129]:
rename_dict = {
    'CH_AT': 'CH_AT_lag0',
    'CH_DE': 'CH_DE_lag0',
    'CH_FR': 'CH_FR_lag0',
    'CH_IT': 'CH_IT_lag0',
    'AT_CH': 'AT_CH_lag0',
    'DE_CH': 'DE_CH_lag0',
    'FR_CH': 'FR_CH_lag0',
    'IT_CH': 'IT_CH_lag0',
    #'Loss': 'Loss_lag0',
    'solar_fore_de': 'solar_fore_de_lag0',
    'solar_fore_it': 'solar_fore_it_lag0',
    'wind_fore_de': 'wind_fore_de_lag0',
    'wind_fore_it': 'wind_fore_it_lag0',
    'temperature_fore_ch': 'temperature_fore_ch_lag0',
    'temperature_fore_fr': 'temperature_fore_fr_lag0',
    'temperature_fore_de': 'temperature_fore_de_lag0',
    'temperature_fore_it': 'temperature_fore_it_lag0'#,
    #'hour': 'hour_lag0'
}

# Rename the columns
merged_ready_1 = merged_ready_1.rename(columns=rename_dict)

In [130]:
num_cols_per_line = 5
for i in range(0, len(merged_ready_1.columns), num_cols_per_line):
    print(', '.join(merged_ready_1.columns[i:i+num_cols_per_line]))

index, datetime, CH_AT_lag0, CH_DE_lag0, CH_FR_lag0
CH_IT_lag0, AT_CH_lag0, DE_CH_lag0, FR_CH_lag0, IT_CH_lag0
Loss, solar_fore_de_lag0, solar_fore_it_lag0, wind_fore_de_lag0, wind_fore_it_lag0
temperature_fore_ch_lag0, temperature_fore_fr_lag0, temperature_fore_de_lag0, temperature_fore_it_lag0, hour
CH_DE_lag1, CH_DE_lag2, CH_DE_lag3, CH_DE_lag4, CH_DE_lag5
CH_DE_lag6, CH_DE_lag7, CH_DE_lag8, CH_DE_lag9, CH_DE_lag10
CH_DE_lag11, CH_DE_lag12, CH_DE_lag13, CH_DE_lag14, CH_DE_lag15
CH_DE_lag16, CH_DE_lag17, CH_DE_lag18, CH_DE_lag19, CH_DE_lag20
CH_DE_lag21, CH_DE_lag22, CH_DE_lag23, CH_DE_lag24, CH_DE_lag25
CH_DE_lag26, CH_DE_lag27, CH_DE_lag28, CH_DE_lag29, CH_DE_lag30
CH_DE_lag31, CH_DE_lag32, CH_DE_lag33, CH_DE_lag34, CH_DE_lag35
CH_DE_lag36, CH_DE_lag37, CH_DE_lag38, CH_DE_lag39, CH_DE_lag40
CH_DE_lag41, CH_DE_lag42, CH_DE_lag43, CH_DE_lag44, CH_DE_lag45
CH_DE_lag46, CH_DE_lag47, CH_DE_lag48, CH_DE_lag49, CH_DE_lag50
CH_DE_lag51, CH_DE_lag52, CH_DE_lag53, CH_DE_lag54, CH_DE_lag55
CH

In [131]:
#make sure all the losses with lag are named -1


def modify_col_name(col_name):
    match = re.match(r"Loss_lag(\d+)", col_name)
    if match:
        number = int(match.group(1))
        return f"Loss_lag{number - 1}"
    return col_name

# Apply the function to each column name
merged_ready_1.columns = [modify_col_name(col) for col in merged_ready_1.columns]


In [132]:
# replac 0 with 24 because forecast 1-24
merged_ready_1.loc[merged_ready_1["hour"] == 0, "hour"] = 24



## 2.2 Create 24 datasets

In [133]:
dfs_total = {}

dataset_lags = [24, 48, 72, 96,120,144,168,192,216,240,264,288,312,336]

for i in dataset_lags:
    working_df = merged_ready_1.copy()
    
    threshold = i-1

    # Extract columns to drop based on their name
    cols_to_drop = []
    for col in working_df.columns:
        match = re.search(r'lag(\d+)', col)
        if match:
            lag_number = int(match.group(1))
            if lag_number > threshold:
                cols_to_drop.append(col)

    # Drop these columns
    working_df = working_df.drop(columns=cols_to_drop)
    
    
    
    dfs_total[f'df_{i}'] = working_df



In [134]:
for key, value in dfs_total.items():
    print(f"Key: {key}, Type: {type(value)}")


Key: df_24, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_48, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_72, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_96, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_120, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_144, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_168, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_192, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_216, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_240, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_264, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_288, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_312, Type: <class 'pandas.core.frame.DataFrame'>
Key: df_336, Type: <class 'pandas.core.frame.DataFrame'>


In [135]:
#  delete all the columns with lag lower than threshold

def set_midnight(df_input, threshold):
   
    
    working_df = df_input.copy()

    
    cols_to_drop = []
    for col in working_df.columns:
        match = re.search(r'lag(\d+)', col)
        if match:
            lag_number = int(match.group(1))
            if lag_number < threshold:
                cols_to_drop.append(col)
    working_df = working_df.drop(columns=cols_to_drop)
    
    return working_df

# Example usage:
# result_df = drop_lagged_columns(merged_ready_1, threshold_value)

In [136]:
filtered_datasets = {}
for lag in dataset_lags:
    key = f"df_{lag}"
    df_current = dfs_total[key]
    filtered_datasets[key] = {}
    
    for i in range(1, 25):
        filtered_df = df_current[df_current['hour'] == i]
        filtered_df_filtered=set_midnight(filtered_df, i)
        filtered_datasets[key][i] = filtered_df_filtered



In [137]:
filtered_datasets["df_72"][3].head(2)

Unnamed: 0,index,datetime,Loss,hour,CH_DE_lag3,CH_DE_lag4,CH_DE_lag5,CH_DE_lag6,CH_DE_lag7,CH_DE_lag8,...,Loss_lag62,Loss_lag63,Loss_lag64,Loss_lag65,Loss_lag66,Loss_lag67,Loss_lag68,Loss_lag69,Loss_lag70,Loss_lag71
170,171,2019-01-08 03:00:00,185.935455,3,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,...,164.544605,159.371671,182.746698,197.434057,197.096809,183.868235,199.355419,191.840655,184.524079,171.94097
194,195,2019-01-09 03:00:00,188.856706,3,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,...,170.252436,168.119765,194.269492,228.351993,257.248554,262.511493,226.068474,202.920325,190.839731,254.044293


# 3. Run the model

### 3.1 Define Model Formula

In [138]:


def compute_mae(merged_df_ready_prep):
    # Ensure the merged_ready DataFrame is created first
    merged_ready = merged_df_ready_prep.copy()
    merged_ready=merged_ready.drop('datetime', axis=1)
   
    X = merged_ready.drop('Loss', axis=1)  # Features
    y = merged_ready['Loss']  # Target variable

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    params = {
        'boosting_type': 'gbdt',
        'objective': 'regression',
        'metric': 'mae',
        'device': 'gpu',  # Enable GPU support
        
    }

    # Create the LightGBM dataset
    train_data = lgb.Dataset(X_train, label=y_train)
    
    # Train the model
    model = lgb.train(params, train_data)

    # Predict the test data
    y_pred = model.predict(X_test, num_iteration=model.best_iteration)

    # Calculate MAE
    mae = mean_absolute_error(y_test, y_pred)

    return mae



### 3.2 Run the models

In [None]:




# Lists to store the main keys, sub-keys and the number of rows
main_keys = []
sub_keys = []
num_rows = []
#for lag in [48]:
for lag in dataset_lags:
    dataset_key = "df_" + str(lag)
    if dataset_key in filtered_datasets:
        #for sub_key in range(1, 25):  # since you mentioned it goes from 1 to 24
        for sub_key in range(1, 25):
            main_keys.append(dataset_key)
            sub_keys.append(sub_key)
            num_rows.append(compute_mae(filtered_datasets[dataset_key][sub_key]))

# Create a DataFrame
result_df = pd.DataFrame({
    'Dataset_Key': main_keys,
    'Sub_Key': sub_keys,
    'Num_Rows': num_rows
})

print(result_df)



[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 57517
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 358
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 349 dense feature groups (0.27 MB) transferred to GPU in 0.173574 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 105.066014
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 55301
[LightGBM] [Info] Number of data points in the train set: 824, number of used features: 344
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightG

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 37982
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 235
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 233 dense feature groups (0.18 MB) transferred to GPU in 0.038965 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 100.879272
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 35293
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 219
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightG

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 21834
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 139
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 137 dense feature groups (0.11 MB) transferred to GPU in 0.005419 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 101.326330
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 19143
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 123
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightG

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 6796
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 43
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 43 dense feature groups (0.03 MB) transferred to GPU in 0.001850 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 104.131974
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 4616
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 29
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [

[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 711 dense feature groups (0.56 MB) transferred to GPU in 0.009038 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 105.066014
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 114644
[LightGBM] [Info] Number of data points in the train set: 824, number of used features: 715
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 697 dense feature groups (0.55 MB) transferred to GPU in 0.009299 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 108.574847
[LightGB

[LightGBM] [Info] Start training from score 104.865695
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 97403
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 606
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 596 dense feature groups (0.46 MB) transferred to GPU in 0.008593 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 100.879272
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 94713
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 590
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 83947
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 526
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 516 dense feature groups (0.40 MB) transferred to GPU in 0.009866 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 99.836819
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 81255
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 510
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGB

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 68490
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 430
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 420 dense feature groups (0.33 MB) transferred to GPU in 0.011203 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 103.875209
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 66223
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 414
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightG

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 171951
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 1072
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 1046 dense feature groups (0.82 MB) transferred to GPU in 0.011594 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 107.081595
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 169759
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 1057
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[L

[LightGBM] [Info] 958 dense feature groups (0.75 MB) transferred to GPU in 0.009697 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 100.879272
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 154116
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 961
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 942 dense feature groups (0.74 MB) transferred to GPU in 0.009747 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 98.723679
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 151423
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 945
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corp

[LightGBM] [Info] 846 dense feature groups (0.66 MB) transferred to GPU in 0.009535 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 100.559054
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 135327
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 849
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 830 dense feature groups (0.65 MB) transferred to GPU in 0.009633 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 97.673756
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 132773
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 833
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corp

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 119052
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 743
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 725 dense feature groups (0.57 MB) transferred to GPU in 0.047712 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 103.382266
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 235762
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 1471
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[Lig

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 224309
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 1396
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 1367 dense feature groups (1.07 MB) transferred to GPU in 0.011184 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 100.308350
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 221633
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 1380
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[L

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 208149
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1300
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 1272 dense feature groups (0.99 MB) transferred to GPU in 0.010585 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 92.951959
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 205452
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1284
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[Li

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 194744
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1220
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 1192 dense feature groups (0.93 MB) transferred to GPU in 0.010543 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 97.673756
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 192194
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1204
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[Li

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 178481
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1114
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 1088 dense feature groups (0.85 MB) transferred to GPU in 0.011243 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 103.382266
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 295183
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 1842
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[L

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 281066
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 1751
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 1713 dense feature groups (1.34 MB) transferred to GPU in 0.012297 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 102.250360
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 278363
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1735
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[L

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 264885
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1655
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 1619 dense feature groups (1.26 MB) transferred to GPU in 0.015733 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 95.387141
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 262190
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1639
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[Li

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 249142
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1559
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 1523 dense feature groups (1.19 MB) transferred to GPU in 0.013788 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 102.067586
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 246726
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1543
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[L

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 354578
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 2213
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 2160 dense feature groups (1.69 MB) transferred to GPU in 0.017769 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 105.066014
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 352286
[LightGBM] [Info] Number of data points in the train set: 824, number of used features: 2199
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[L

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 337738
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2106
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 2062 dense feature groups (1.61 MB) transferred to GPU in 0.040712 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 104.865695
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 335048
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2090
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[L

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 321596
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2010
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 1966 dense feature groups (1.54 MB) transferred to GPU in 0.064857 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 99.836819
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 318903
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1994
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[Li

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 306163
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1914
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 1870 dense feature groups (1.46 MB) transferred to GPU in 0.063607 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 103.875209
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 303899
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 1898
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[L

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 409647
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 2557
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 2496 dense feature groups (1.95 MB) transferred to GPU in 0.014230 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 107.081595
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 407436
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 2541
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[L

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 391734
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2445
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 2393 dense feature groups (1.87 MB) transferred to GPU in 0.795618 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 98.723679
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 389040
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2429
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[Li

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 372963
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2333
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 2281 dense feature groups (1.78 MB) transferred to GPU in 0.046136 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 97.673756
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 370417
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2317
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[Li

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 358890
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2241
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 2191 dense feature groups (1.71 MB) transferred to GPU in 0.068054 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 99.705672
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 356724
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2227
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[Li

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 462104
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 2881
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 2817 dense feature groups (2.20 MB) transferred to GPU in 0.014839 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 100.308350
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 459417
[LightGBM] [Info] Number of data points in the train set: 819, number of used features: 2865
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[L

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 445886
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2785
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 2724 dense feature groups (2.13 MB) transferred to GPU in 0.025706 secs. 1 sparse feature groups
[LightGBM] [Info] Start training from score 92.951959
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 443186
[LightGBM] [Info] Number of data points in the train set: 818, number of used features: 2769
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[Li



### 3.3 Save the model

In [None]:
filename = "results_lags_[" + "_".join(map(str, dataset_lags)) + "].csv"
#dataset_lags_custom=[48]
#filename = "results_lags_[" + "_".join(map(str, dataset_lags_custom)) + "].csv"

In [None]:
filename

In [None]:
result_df.to_csv(filename, index=False)