In [37]:
import pandas as pd 
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error

In [38]:
weather = pd.read_csv('../Datasets/weather.csv')
df = pd.read_csv('../Datasets/train.csv')

weather['date'] = pd.to_datetime(weather['date'])
df['Tarih'] = pd.to_datetime(df['Tarih'])

In [39]:
# extractor
def extractor(df, date_col, weather, mapping=False):
    # dont show warnings
    import warnings
    warnings.filterwarnings('ignore')
    
    df[date_col] = pd.to_datetime(df[date_col])
    
    # Extracting date features
    df['Saat'] = df[date_col].dt.hour
    df['Aylık Gün'] = df[date_col].dt.day
    df['Yıllık Gün'] = df[date_col].dt.dayofyear
    df['Haftanın günü'] = df[date_col].dt.day_name()
    df['Hafta'] = df[date_col].dt.week
    df['Ay'] = df[date_col].dt.month
    df['Çeyreklik'] = df[date_col].dt.quarter
    df['Yıl'] = df[date_col].dt.year
    
    # Extracting holiday features
    import holidays
    tr_holidays = holidays.Turkey()
    df['Özel Gün'] = df[date_col].apply(lambda x: x in tr_holidays)
    
    # Extracting seasonality features
    def get_season(month):
        if month >= 3 and month <= 5:
            return 'Spring'
        elif month >= 6 and month <= 8:
            return 'Summer'
        elif month >= 9 and month <= 11:
            return 'Autumn'
        else:
            return 'Winter'
    
    df['Mevsim'] = df[date_col].dt.month.apply(get_season)
    
    # Adding weather features
    weather['date'] = pd.to_datetime(weather['date']).dt.date
    df['date_no_time'] = df[date_col].dt.date
    df = pd.merge(df, weather, left_on='date_no_time', right_on='date', how='left')
    df.drop(['date_no_time','date'], axis=1, inplace=True)
    
    # Extracting weekend features
    import datetime
    def is_weekend(date_str):
        date_obj = datetime.datetime.strptime(date_str, '%Y-%m-%d').date()
        return 1 if date_obj.weekday() >= 5 else 0
    
    df['Hafta Sonu'] = df[date_col].dt.strftime('%Y-%m-%d').apply(is_weekend)
    
    if mapping == True:
        ozel_gun = {
            True:1,
            False:0
        }

        mevsim = {
            "Winter":1,
            "Spring":2,
            "Summer":3,
            "Autumn":4
        }

        hafta_gunleri = {
            "Monday":1,
            "Tuesday":2,
            "Wednesday":3,
            "Thursday":4,
            "Friday":5,
            "Saturday":6,
            "Sunday":7
        }
        
        df['Haftanın günü'] = df['Haftanın günü'].map(hafta_gunleri)
        df['Özel Gün'] = df['Özel Gün'].map(ozel_gun)
        df['Mevsim'] = df['Mevsim'].map(mevsim)

        return df
    
    else:
        return df

In [40]:
X = extractor(df, 'Tarih', weather, mapping=True)
y = X['Dağıtılan Enerji (MWh)']
X = X.drop(['Tarih', 'Dağıtılan Enerji (MWh)'], axis=1)

X_train = X[:int(len(X)*0.8)]
X_test = X[int(len(X)*0.8):]

y_train = y[:int(len(y)*0.8)]
y_test = y[int(len(y)*0.8):]

In [41]:
# without scaling
xgb_model = xgb.XGBRegressor(learning_rate= 0.05, max_depth=5, n_estimators=250)
xgb_model.fit(X_train, y_train)
no_scaled_preds = xgb_model.predict(X_test)

In [42]:
mape = mean_absolute_percentage_error(y_test, no_scaled_preds)
rmse = np.sqrt(mean_squared_error(y_test,no_scaled_preds))
f"MAPE: {mape:.2f} RMSE: {rmse:.2f}"

'MAPE: 0.05 RMSE: 108.94'

In [43]:
#with scaling
scaler = MinMaxScaler()
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

target_scaler = MinMaxScaler()
scaled_y_train = target_scaler.fit_transform(y_train.values.reshape(-1,1))
scaled_y_test = target_scaler.transform(y_test.values.reshape(-1,1))

In [44]:
xgb_model = xgb.XGBRegressor(learning_rate= 0.05, max_depth=5, n_estimators=250)
xgb_model.fit(scaled_X_train, scaled_y_train)
scaled_preds = xgb_model.predict(scaled_X_test)

In [45]:
# denormalize predictions
denormalized_preds = target_scaler.inverse_transform(scaled_preds.reshape(-1,1))

In [46]:
mape = mean_absolute_percentage_error(y_test, denormalized_preds)
rmse = np.sqrt(mean_squared_error(y_test,denormalized_preds))
f"MAPE: {mape:.2f} RMSE: {rmse:.2f}"

'MAPE: 0.05 RMSE: 109.24'

Verilerin Scale edilmiş olması ile olmamış olması farketmiyor.

In [47]:
# full data train
xgb_model = xgb.XGBRegressor(learning_rate = 0.05, max_depth=5, n_estimators=264)
xgb_model.fit(X, y)

In [48]:
# create submission
submission = pd.read_csv('../Datasets/sample_submission.csv')
submission['Tarih'] = pd.to_datetime(submission['Tarih'])
submission = extractor(submission, 'Tarih', weather, mapping=True)
submission_tarih = submission[['Tarih']]
submission = submission.drop(['Tarih', 'Dağıtılan Enerji (MWh)'], axis=1)
submission.head()

Unnamed: 0,Saat,Aylık Gün,Yıllık Gün,Haftanın günü,Hafta,Ay,Çeyreklik,Yıl,Özel Gün,Mevsim,Min_temperature,Max_temperature,Hafta Sonu
0,0,1,213,1,31,8,3,2022,0,3,23,39,0
1,1,1,213,1,31,8,3,2022,0,3,23,39,0
2,2,1,213,1,31,8,3,2022,0,3,23,39,0
3,3,1,213,1,31,8,3,2022,0,3,23,39,0
4,4,1,213,1,31,8,3,2022,0,3,23,39,0


In [49]:
xgb_preds = xgb_model.predict(submission)

In [50]:
submission_tarih['Dağıtılan Enerji (MWh)'] = xgb_preds
submission_tarih.head()

Unnamed: 0,Tarih,Dağıtılan Enerji (MWh)
0,2022-08-01 00:00:00,2196.22583
1,2022-08-01 01:00:00,2052.227539
2,2022-08-01 02:00:00,1904.351929
3,2022-08-01 03:00:00,1810.343872
4,2022-08-01 04:00:00,1778.296509


In [51]:
submission_tarih.to_csv('../Datasets/XGB Submission/xgb_tuned_submission.csv', index=False)

### Merge with Vanilla LSTM Model

In [54]:
train = pd.read_csv('../Datasets/train.csv')
test = pd.read_csv('../Datasets/XGB Submission/xgb_tuned_submission.csv')

train['Tarih'] = pd.to_datetime(train['Tarih'])
test['Tarih'] = pd.to_datetime(test['Tarih'])
train.head()

Unnamed: 0,Tarih,Dağıtılan Enerji (MWh)
0,2018-01-01 00:00:00,1593.944216
1,2018-01-01 01:00:00,1513.933887
2,2018-01-01 02:00:00,1402.612637
3,2018-01-01 03:00:00,1278.527266
4,2018-01-01 04:00:00,1220.697701


In [56]:
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error

In [58]:
model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')

In [59]:
history = model.fit(X_train, y_train, epochs=50, batch_size=72, validation_data=(X_test, y_test), 
                    callbacks=[EarlyStopping(monitor='val_loss', patience=10)], verbose=0, shuffle=False)

In [60]:
import plotly.express as px
fig = px.line(history.history, y=['loss', 'val_loss'], 
              labels={'value':'Loss', 'variable':'Type', 'index':'Epoch'},
              title='Training and Validation Losses')
fig.update_xaxes(title='Epoch')
fig.update_yaxes(title='Loss')
fig.show()

In [61]:
history = model.fit(X_train, y_train, epochs=27, batch_size=72, validation_data=(X_test, y_test), 
                    callbacks=[EarlyStopping(monitor='val_loss', patience=10)], verbose=0, shuffle=False)

fig = px.line(history.history, y=['loss', 'val_loss'], 
              labels={'value':'Loss', 'variable':'Type', 'index':'Epoch'},
              title='Training and Validation Losses')
fig.update_xaxes(title='Epoch')
fig.update_yaxes(title='Loss')
fig.show()

In [103]:
predicted_vals = model.predict(X_test)



In [104]:
import plotly.graph_objs as go

fig = go.Figure()

fig.add_trace(go.Scatter(x=test['Tarih'], y=real_y_test.flatten(),
                    mode='lines',
                    name='Gerçek Değerler'))

fig.add_trace(go.Scatter(x=test['Tarih'], y=predicted_vals.flatten(),
                    mode='lines',
                    name='Tahminler'))

fig.update_layout(title='Vanilla LSTM Tahminleri',
                   xaxis_title='Tarih',
                   yaxis_title='Dağıtılan Enerji (MWh)')

fig.show()


In [99]:
mape = mean_absolute_percentage_error(test['Dağıtılan Enerji (MWh)'].values, preds_combined)
rmse = np.sqrt(mean_squared_error(test['Dağıtılan Enerji (MWh)'].values, preds_combined))
f"MAPE: {mape:.2f} RMSE: {rmse:.2f}"

ValueError: y_true and y_pred have different number of output (1!=24)

In [None]:
y_pred = scaler.inverse_transform(y_pred)

# Gerçek değerleri geri dönüştürme
y_test = scaler.inverse_transform(y_test)

# Gerçek ve tahmin edilen değerleri birleştirme
results = pd.DataFrame({'Actual': y_test.ravel(),
                        'Predicted': y_pred.ravel()})