In [None]:
#------------------------------------------------------------------------------#
# Imports
#------------------------------------------------------------------------------#
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Dense, LSTM, GRU

from sklearn.preprocessing import StandardScaler

np.set_printoptions(precision=6)
np.set_printoptions(suppress=True)

# Supress warnings.
import warnings
warnings.filterwarnings('ignore')

# Load dataset.
df = pd.read_csv('./data.csv')

In [None]:
#------------------------------------------------------------------------------#
# Preprocessing
#------------------------------------------------------------------------------#

# Convert 'time' to datetime format.
train_set_dates = pd.to_datetime(df['time'])

# Convert the rest to float.
columns = list(df.columns[1:55])
df_train = df[columns].astype(float)

# Normalize the data using the Standard scaler.
standard_scaler = StandardScaler()
scalar = standard_scaler.fit(df_train)
scaled_training_df = scalar.transform(df_train)

In [None]:
#------------------------------------------------------------------------------#
# Create sliding windows for training.
#------------------------------------------------------------------------------#
future_num = 900 # num of rows to predict the future values
past_num = 900 # number of rows on which the prediction is computed

trainX = []

for i in range(past_num, len(scaled_training_df) - future_num+1):
    window = scaled_training_df[i - past_num:i, 0:df_train.shape[1]]
    trainX.append(window)

trainX = np.array(trainX)

print(f'trainX shape: {trainX.shape}')

trainX shape: (11200, 900, 54)


In [None]:
#------------------------------------------------------------------------------#
# Create model.
#------------------------------------------------------------------------------#

model = Sequential()
model.add(GRU(64, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
model.add(GRU(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(future_num))
model.compile(optimizer='adam', loss='mse')
model.summary()

In [None]:
def create_target_sequences(df, past_num, future_num):
  trainYs = []

  num_cols = df.shape[1]

  for target_col_index in range(num_cols):
    trainY = []

    for i in range(past_num, len(df) - future_num+1):
        target = df[i:i + future_num, target_col_index]
        trainY.append(target)
    trainYs.append(trainY)

  trainYs = np.array(trainYs)

  return trainYs

In [None]:
#------------------------------------------------------------------------------#
# Generate predictions.
#------------------------------------------------------------------------------#

# Create target sequences for each column.
trainYs = create_target_sequences(scaled_training_df, past_num, future_num)

import time

predictions = []
input_sequence = np.expand_dims(trainX[-1], axis=0)

for target_col_index in range(trainYs.shape[0]):  # Iterate over target columns
    column_name = columns[target_col_index]
    trainY = trainYs[target_col_index].reshape(-1, future_num)
    start_time = time.time()
    history = model.fit(trainX,
                        trainY,
                        epochs=5,
                        batch_size=32,
                        validation_split=0.1,
                        verbose=1)
    end_time = time.time()
    print(f'\n\nTraining time {target_col_index}:', end_time - start_time)

    plt.figure(figsize=(4,1.5))
    plt.plot(history.history['loss'], label='Training loss')
    plt.plot(history.history['val_loss'], label='Validation loss')
    plt.legend()

    # Predict values for the last window.
    predicted_values = model.predict(input_sequence)

    predicted_sequence = trainX[-1]

    # Replace target column in the predicted_sequence with the predicted_values across all rows.
    for i in range(len(predicted_values[0])):
        predicted_sequence[i][target_col_index] = predicted_values[0][i]

new_df = pd.DataFrame(predicted_sequence, columns=df_train.columns)
prediction_rescaled = scalar.inverse_transform(new_df)

df_forecast = pd.DataFrame(prediction_rescaled, columns=columns)
df_forecast['time'] = df['time'].iloc[-future_num:].values
df_forecast['time']=pd.to_datetime(df_forecast['time'])

original = df
original['time']=pd.to_datetime(original['time'])

for target_col_index in range(trainYs.shape[0]):

    column_name = columns[target_col_index]

    plt.figure(figsize=(16,6))
    sns.lineplot(data=original, x='time', y=column_name, label='Original', color='blue')
    sns.lineplot(data=df_forecast, x='time', y=column_name, label='Forecast', color='orange')
    plt.legend()
    plt.title(f'{column_name} Forecast')
    plt.show()



Epoch 1/5
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 154ms/step - loss: 0.5144 - val_loss: 3.5224
Epoch 2/5
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 151ms/step - loss: 0.0768 - val_loss: 3.5170
Epoch 3/5
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 151ms/step - loss: 0.0529 - val_loss: 2.9044
Epoch 4/5
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 151ms/step - loss: 0.0480 - val_loss: 3.0505
Epoch 5/5
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 151ms/step - loss: 0.0454 - val_loss: 3.2208


Training time 0: 248.53663063049316
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 760ms/step
Epoch 1/5
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 150ms/step - loss: 0.9766 - val_loss: 1.9893
Epoch 2/5
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 150ms/step - loss: 0.6425 - val_loss: 2.1815
Epoch 3/5
[1m315/315[0m [32m━━━━

In [None]:
# Write predictions to a csv file.
df_predictions = pd.DataFrame(prediction_rescaled, columns=columns)
df_predictions.to_csv('./predictions.csv', index=False)