In [39]:
import pandas as pd 
import numpy as np 

train = pd.read_csv('../Datasets/train.csv')
weather = pd.read_csv('../Datasets/weather.csv')

train['Tarih'] = pd.to_datetime(train['Tarih'])
weather['date'] = pd.to_datetime(weather['date'])

Feature extraction function

In [40]:
def extractor(df, date_col, weather, mapping=False):
    # dont show warnings
    import warnings
    warnings.filterwarnings('ignore')
    
    df[date_col] = pd.to_datetime(df[date_col])
    
    # Extracting date features
    df['Saat'] = df[date_col].dt.hour
    df['Aylık Gün'] = df[date_col].dt.day
    df['Yıllık Gün'] = df[date_col].dt.dayofyear
    df['Haftanın günü'] = df[date_col].dt.day_name()
    df['Hafta'] = df[date_col].dt.week
    df['Ay'] = df[date_col].dt.month
    df['Çeyreklik'] = df[date_col].dt.quarter
    df['Yıl'] = df[date_col].dt.year
    
    # Extracting holiday features
    import holidays
    tr_holidays = holidays.Turkey()
    df['Özel Gün'] = df[date_col].apply(lambda x: x in tr_holidays)
    
    # Extracting seasonality features
    def get_season(month):
        if month >= 3 and month <= 5:
            return 'Spring'
        elif month >= 6 and month <= 8:
            return 'Summer'
        elif month >= 9 and month <= 11:
            return 'Autumn'
        else:
            return 'Winter'
    
    df['Mevsim'] = df[date_col].dt.month.apply(get_season)
    
    # Adding weather features
    weather['date'] = pd.to_datetime(weather['date']).dt.date
    df['date_no_time'] = df[date_col].dt.date
    df = pd.merge(df, weather, left_on='date_no_time', right_on='date', how='left')
    df.drop(['date_no_time','date'], axis=1, inplace=True)
    
    # Extracting weekend features
    import datetime
    def is_weekend(date_str):
        date_obj = datetime.datetime.strptime(date_str, '%Y-%m-%d').date()
        return 1 if date_obj.weekday() >= 5 else 0
    
    df['Hafta Sonu'] = df[date_col].dt.strftime('%Y-%m-%d').apply(is_weekend)
    
    if mapping == True:
        ozel_gun = {
            True:1,
            False:0
        }

        mevsim = {
            "Winter":1,
            "Spring":2,
            "Summer":3,
            "Autumn":4
        }

        hafta_gunleri = {
            "Monday":1,
            "Tuesday":2,
            "Wednesday":3,
            "Thursday":4,
            "Friday":5,
            "Saturday":6,
            "Sunday":7
        }
        
        df['Haftanın günü'] = df['Haftanın günü'].map(hafta_gunleri)
        df['Özel Gün'] = df['Özel Gün'].map(ozel_gun)
        df['Mevsim'] = df['Mevsim'].map(mevsim)

        return df
    
    else:
        return df

In [41]:
df = extractor(train, 'Tarih', weather, mapping=True);df.head()

Unnamed: 0,Tarih,Dağıtılan Enerji (MWh),Saat,Aylık Gün,Yıllık Gün,Haftanın günü,Hafta,Ay,Çeyreklik,Yıl,Özel Gün,Mevsim,Min_temperature,Max_temperature,Hafta Sonu
0,2018-01-01 00:00:00,1593.944216,0,1,1,1,1,1,1,2018,1,1,4,12,0
1,2018-01-01 01:00:00,1513.933887,1,1,1,1,1,1,1,2018,1,1,4,12,0
2,2018-01-01 02:00:00,1402.612637,2,1,1,1,1,1,1,2018,1,1,4,12,0
3,2018-01-01 03:00:00,1278.527266,3,1,1,1,1,1,1,2018,1,1,4,12,0
4,2018-01-01 04:00:00,1220.697701,4,1,1,1,1,1,1,2018,1,1,4,12,0


Prepare the dataset for Stacked LSTM

In [49]:
train = df.iloc[:int(len(df)*.75),:]
test = df.iloc[int(len(df)*.75):,:]

X_train = train.drop(['Tarih','Dağıtılan Enerji (MWh)'], axis=1)
y_train = train['Dağıtılan Enerji (MWh)']

X_test = test.drop(['Tarih','Dağıtılan Enerji (MWh)'], axis=1)
y_test = test['Dağıtılan Enerji (MWh)']

# set train test to 3d
X_train = X_train.values.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.values.reshape(X_test.shape[0], 1, X_test.shape[1])

In [50]:
X_train.shape, X_test.shape

((30114, 1, 13), (10038, 1, 13))

### Stacked LSTM

In [47]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout

In [56]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(1, 13)))
model.add(keras.layers.Dropout(0.2))
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')

In [57]:
history = model.fit(X_train, y_train, epochs=50, batch_size=72, validation_data=(X_test, y_test), 
                    callbacks=[EarlyStopping(monitor='val_loss', patience=10)], verbose=0, shuffle=False)

In [58]:
stacked_preds = model.predict(X_test)



In [61]:
# mape, rmse
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
mape = mean_absolute_percentage_error(y_test, stacked_preds)
rmse = np.sqrt(mean_squared_error(y_test, stacked_preds))
f"MAPE: {mape:.2f} RMSE: {rmse:.2f}"

'MAPE: 0.56 RMSE: 1227.97'

In [60]:
import plotly.express as px
fig = px.line(history.history, y=['loss', 'val_loss'], 
              labels={'value':'Loss', 'variable':'Type', 'index':'Epoch'},
              title='Training and Validation Losses')
fig.update_xaxes(title='Epoch')
fig.update_yaxes(title='Loss')
fig.show()

<hr>