In [1]:
#imprt libraries
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [2]:
#import training and testing data sets
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('Test_Jan.csv')

In [3]:
#convert date column to datetime format
train_df['Date'] = pd.to_datetime(train_df['datetime'], format='%d-%m-%Y %H:%M')
test_df['Date'] = pd.to_datetime(test_df['datetime'], format='%d-%m-%Y %H:%M')

In [4]:
#create a new data frame using necessary columns from the original data frame
columns = ['datetime','nat_demand','T2M_toc','T2M_san','T2M_dav']
train_df = pd.DataFrame(train_df[columns])
test_df = pd.DataFrame(test_df[columns])

In [5]:
#standardize the data 
scaler = StandardScaler()
target_scaler = StandardScaler()
target_scaler.fit(train_df[['nat_demand']])
train_df[columns[1:]] = scaler.fit_transform(train_df[columns[1:]])
test_df[columns[1:]] = scaler.transform(test_df[columns[1:]]) #use same mean and variance as training data

In [6]:
#select principal components
clms = ['T2M_toc','T2M_san','T2M_dav']
pca = PCA(n_components=2)
pca = pca.fit(train_df[clms])
train_df['pca1'] = pca.transform(train_df[clms])[:,0]
test_df['pca1'] = pca.transform(test_df[clms])[:,0] #use same mean and variance as training data

In [7]:
#setup the input data for model training
features = ['nat_demand','pca1']
window_size = 10
X_train = []
y_train = []
X_test = []
y_test = []

for i in range(len(train_df)-window_size):
    X_train.append(train_df[features].iloc[i:i+window_size].values)
    y_train.append(train_df['nat_demand'][i+window_size])
X_train = np.array(X_train)
y_train = np.array(y_train)

for j in range(len(test_df)-window_size):
    X_test.append(test_df[features].iloc[j:j+window_size].values)
    y_test.append(test_df['nat_demand'][j+window_size])
X_test = np.array(X_test)
y_test = np.array(y_test)


In [8]:
model = Sequential([
    LSTM(50, activation='tanh', input_shape=(window_size, 2)),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')
model.summary()

  super().__init__(**kwargs)


In [9]:
model.fit(X_train, y_train, epochs=10, batch_size=16)

Epoch 1/10
[1m2736/2736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - loss: 0.1272
Epoch 2/10
[1m2736/2736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0194
Epoch 3/10
[1m2736/2736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0190
Epoch 4/10
[1m2736/2736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0189
Epoch 5/10
[1m2736/2736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0170
Epoch 6/10
[1m2736/2736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0160
Epoch 7/10
[1m2736/2736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0139
Epoch 8/10
[1m2736/2736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 0.0154
Epoch 9/10
[1m2736/2736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0156
Epoch 10/10
[1m2736/2736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x17f53127050>

In [10]:
#make predictions
predictions = model.predict(X_test)

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


In [11]:
predictions.shape

(734, 1)

In [13]:
reshaped_predictions = predictions.reshape(-1,1)
predicted_values = target_scaler.inverse_transform(reshaped_predictions)