In [27]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import pandas as pd
import numpy as np
import copy
import lightgbm as lgb

from datetime import datetime, timedelta
df_ntc = pd.read_csv("../data/NTC.csv")
df_losses = pd.read_csv(
    "../data/Avtice-losses.csv",
    skiprows=2,
    names=["timestamps", "kWh"],
    parse_dates=["timestamps"],
)
df_res = pd.read_csv("../data/Forecast-renewable-generation.csv")
df_temp = pd.read_csv("../data/Forecast-temperature.csv")
df_losses


Unnamed: 0,timestamps,kWh
0,2019-01-01 00:15:00,39143.346123
1,2019-01-01 00:30:00,32788.068958
2,2019-01-01 00:45:00,33018.915678
3,2019-01-01 01:00:00,34574.672936
4,2019-01-01 01:15:00,33417.095550
...,...,...
105211,2021-12-31 23:00:00,40720.617362
105212,2021-12-31 23:15:00,38156.038776
105213,2021-12-31 23:30:00,36290.836689
105214,2021-12-31 23:45:00,35947.703622


In [2]:
df_losses.iloc[:,0]

0        2019-01-01 00:15:00
1        2019-01-01 00:30:00
2        2019-01-01 00:45:00
3        2019-01-01 01:00:00
4        2019-01-01 01:15:00
                 ...        
105211   2021-12-31 23:00:00
105212   2021-12-31 23:15:00
105213   2021-12-31 23:30:00
105214   2021-12-31 23:45:00
105215   2022-01-01 00:00:00
Name: timestamps, Length: 105216, dtype: datetime64[ns]

In [3]:
# Move time 
df_losses['timestamps'] = pd.to_datetime(df_losses['timestamps']) + timedelta(minutes=-15)
df_losses

Unnamed: 0,timestamps,kWh
0,2019-01-01 00:00:00,39143.346123
1,2019-01-01 00:15:00,32788.068958
2,2019-01-01 00:30:00,33018.915678
3,2019-01-01 00:45:00,34574.672936
4,2019-01-01 01:00:00,33417.095550
...,...,...
105211,2021-12-31 22:45:00,40720.617362
105212,2021-12-31 23:00:00,38156.038776
105213,2021-12-31 23:15:00,36290.836689
105214,2021-12-31 23:30:00,35947.703622


In [4]:
#Convert to Mwh 
df_losses["MWh"] = (df_losses["kWh"].astype(float))/1000
df_losses
type(df_losses["timestamps"])

pandas.core.series.Series

In [5]:
df_losses['hour'] = (df_losses['timestamps'].dt.hour).astype(int)
df_losses['month'] = (df_losses['timestamps'].dt.month).astype(int)
df_losses['day'] = (df_losses['timestamps'].dt.day).astype(int)
df_losses['year'] = (df_losses['timestamps'].dt.year).astype(int)

In [6]:
df_losses

Unnamed: 0,timestamps,kWh,MWh,hour,month,day,year
0,2019-01-01 00:00:00,39143.346123,39.143346,0,1,1,2019
1,2019-01-01 00:15:00,32788.068958,32.788069,0,1,1,2019
2,2019-01-01 00:30:00,33018.915678,33.018916,0,1,1,2019
3,2019-01-01 00:45:00,34574.672936,34.574673,0,1,1,2019
4,2019-01-01 01:00:00,33417.095550,33.417096,1,1,1,2019
...,...,...,...,...,...,...,...
105211,2021-12-31 22:45:00,40720.617362,40.720617,22,12,31,2021
105212,2021-12-31 23:00:00,38156.038776,38.156039,23,12,31,2021
105213,2021-12-31 23:15:00,36290.836689,36.290837,23,12,31,2021
105214,2021-12-31 23:30:00,35947.703622,35.947704,23,12,31,2021


In [7]:
hourly_sum = df_losses.groupby(["year","month","day","hour"])['MWh'].sum().reset_index()
df_losses = hourly_sum 
df_losses["timestamps"] = pd.to_datetime(df_losses[['year', 'month', 'day', 'hour']])

In [8]:
df_losses_copy = copy.deepcopy(df_losses)
df_losses_copy

Unnamed: 0,year,month,day,hour,MWh,timestamps
0,2019,1,1,0,139.525004,2019-01-01 00:00:00
1,2019,1,1,1,129.716036,2019-01-01 01:00:00
2,2019,1,1,2,133.398074,2019-01-01 02:00:00
3,2019,1,1,3,135.133852,2019-01-01 03:00:00
4,2019,1,1,4,131.699424,2019-01-01 04:00:00
...,...,...,...,...,...,...
26299,2021,12,31,19,171.707318,2021-12-31 19:00:00
26300,2021,12,31,20,159.462903,2021-12-31 20:00:00
26301,2021,12,31,21,155.109520,2021-12-31 21:00:00
26302,2021,12,31,22,171.370277,2021-12-31 22:00:00


In [9]:
df_losses.set_index('timestamps', inplace=True)
df_losses

Unnamed: 0_level_0,year,month,day,hour,MWh
timestamps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-01 00:00:00,2019,1,1,0,139.525004
2019-01-01 01:00:00,2019,1,1,1,129.716036
2019-01-01 02:00:00,2019,1,1,2,133.398074
2019-01-01 03:00:00,2019,1,1,3,135.133852
2019-01-01 04:00:00,2019,1,1,4,131.699424
...,...,...,...,...,...
2021-12-31 19:00:00,2021,12,31,19,171.707318
2021-12-31 20:00:00,2021,12,31,20,159.462903
2021-12-31 21:00:00,2021,12,31,21,155.109520
2021-12-31 22:00:00,2021,12,31,22,171.370277


In [17]:
ntc_df = pd.read_csv("../data/NTC.csv")
display(ntc_df)

Unnamed: 0,datetime,CH_AT,CH_DE,CH_FR,CH_IT,AT_CH,DE_CH,FR_CH,IT_CH
0,2019-01-01 00:00:00,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
1,2019-01-01 01:00:00,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
2,2019-01-01 02:00:00,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
3,2019-01-01 03:00:00,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
4,2019-01-01 04:00:00,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
...,...,...,...,...,...,...,...,...,...
26299,2021-12-31 19:00:00,1200.0,4000.0,1400.0,4069.0,1200.0,800.0,3200.0,1810.0
26300,2021-12-31 20:00:00,1200.0,4000.0,1400.0,4069.0,1200.0,800.0,3200.0,1810.0
26301,2021-12-31 21:00:00,1200.0,4000.0,1400.0,3953.0,1200.0,800.0,3200.0,1810.0
26302,2021-12-31 22:00:00,1200.0,4000.0,1400.0,3780.0,1200.0,800.0,3200.0,1810.0


In [18]:
ntc_df.set_index('datetime', inplace=True)
X = ntc_df.drop('CH_DE', axis=1)  # Features
y = ntc_df['CH_DE']  # Target variable

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameters for LightGBM
params = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': 'MAE',
    'device': 'gpu'  # Enable GPU support
}

In [20]:
# Create the LightGBM dataset
train_data = lgb.Dataset(X_train, label=y_train)
# Train the model
model = lgb.train(params, train_data)

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 572
[LightGBM] [Info] Number of data points in the train set: 21043, number of used features: 7
[LightGBM] [Info] Using GPU Device: NVIDIA A100 80GB PCIe, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 7 dense feature groups (0.16 MB) transferred to GPU in 0.006267 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 3610.727083


In [23]:
prediction = model.predict(X_test)
print(len(prediction))

5261


In [28]:
print(mean_absolute_error(prediction, y_test))

160.32880585082535
