In [332]:
import os
import sys
parent_dir = os.path.dirname(os.getcwd())
grandparent_dir = os.path.dirname(parent_dir)
sys.path.append(parent_dir)
utils_dir = os.path.join(parent_dir, grandparent_dir, "src", "utils")
sys.path.append(utils_dir)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import seaborn as sns

import tensorflow as tf
from lag_data import create_lagged_features

In [333]:
X_path= os.path.join("..","..", "data","enriched_input", "X_train.csv")
X = pd.read_csv(X_path, delimiter=',')

y_path= os.path.join("..", "..", "data","enriched_input", "y_train.csv")
y = pd.read_csv(y_path, delimiter=',')

In [334]:
X.set_index("DELIVERY_START", inplace=True)
y.set_index("DELIVERY_START", inplace=True)
X.index = pd.to_datetime(X.index, utc=True)
y.index = pd.to_datetime(y.index, utc=True)
X.shape

(10605, 16)

In [335]:
n_lags = 4  # Number of lagged observations
X_lagged, y_lagged = create_lagged_features(X, y, n_lags)

In [336]:
X_lagged.shape, y_lagged.shape

((10601, 80), (10601, 1))

In [337]:
# Assuming you want to reserve the last 20% of your data for testing
test_size = 0.2
split_idx = int(len(X_lagged) * (1 - test_size))

X_train, X_test = X_lagged[:split_idx], X_lagged[split_idx:]
y_train, y_test = y_lagged[:split_idx], y_lagged[split_idx:]


In [338]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)


In [339]:
from sklearn.metrics import mean_squared_error

predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f"Test MSE: {mse}")

Test MSE: 9142.534720470892


In [340]:
test_size = 0.2
val_size = 0.1

# Calculate the number of examples that will be in the test set
num_test = int(len(X_lagged) * test_size)

# Calculate the number of examples that will be in the validation set
num_val = int(len(X_lagged) * val_size)

# Calculate the index where the training set ends and the validation set begins
train_end_idx = len(X_lagged) - num_val - num_test

# Calculate the index where the validation set ends and the test set begins
val_end_idx = train_end_idx + num_val

# Split the data into training, validation, and test sets
X_train = X_lagged[:train_end_idx]
y_train = y_lagged[:train_end_idx]

X_val = X_lagged[train_end_idx:val_end_idx]
y_val = y_lagged[train_end_idx:val_end_idx]

X_test = X_lagged[val_end_idx:]
y_test = y_lagged[val_end_idx:]

X_train.shape, X_val.shape

((7421, 80), (1060, 80))

In [341]:
def df_to_X_y(df, window_size = 5) :
    df_as_np = df.to_numpy()
    X = []
    y = []
    for i in range(len(df_as_np)-window_size) :
        row = [[a] for a in df_as_np[i:i+5]]
        X.append(row)
        label = df_as_np[i+5]
        y.append(label)
    return np.array(X), np.array(y)

In [342]:
"""window_size = 5
X, y = df_to_X_y(X.join(y), window_size)"""

'window_size = 5\nX, y = df_to_X_y(X.join(y), window_size)'

In [343]:
X.shape, y.shape

((10605, 16), (10605, 1))

In [344]:
y

Unnamed: 0_level_0,spot_id_delta
DELIVERY_START,Unnamed: 1_level_1
2022-01-01 01:00:00+00:00,-36.874770
2022-01-01 02:00:00+00:00,-12.643588
2022-01-01 03:00:00+00:00,-1.950193
2022-01-01 04:00:00+00:00,1.938272
2022-01-01 05:00:00+00:00,0.199907
...,...
2023-03-29 17:00:00+00:00,6.029303
2023-03-29 18:00:00+00:00,13.576177
2023-03-29 19:00:00+00:00,17.478945
2023-03-29 20:00:00+00:00,17.559407


In [345]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam

model1 = Sequential()
model1.add(InputLayer((80, 1)))
model1.add(LSTM(64, return_sequences=True))  # Set return_sequences=True
model1.add(LSTM(128, return_sequences=True))  # Set return_sequences=True for the second LSTM as well
model1.add(LSTM(32))
model1.add(Dense(8, "relu"))
model1.add(Dense(1, "linear"))

model1.summary()


Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_50 (LSTM)              (None, 80, 64)            16896     
                                                                 
 lstm_51 (LSTM)              (None, 80, 128)           98816     
                                                                 
 lstm_52 (LSTM)              (None, 32)                20608     
                                                                 
 dense_40 (Dense)            (None, 8)                 264       
                                                                 
 dense_41 (Dense)            (None, 1)                 9         
                                                                 
Total params: 136593 (533.57 KB)
Trainable params: 136593 (533.57 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [346]:
cp = ModelCheckpoint("model1/", save_best_only = True)
model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])

In [347]:
"""model1.fit(X_train, y_train, validation_data=(X_val, y_val), epochs = 10, callbacks=[cp])"""

'model1.fit(X_train, y_train, validation_data=(X_val, y_val), epochs = 10, callbacks=[cp])'

In [348]:
from tensorflow.keras.models import load_model

model1 = load_model("model1/")



In [349]:
y_lagged['spot_id_delta'] = y_lagged['spot_id_delta'].apply(lambda x: -1 if x < 0 else 1)

"""from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_lagged['spot_id_delta'] = lb.fit_transform(y_lagged['spot_id_delta'])  # This will convert [-1, 1] to [0, 1] and then to [[1, 0], [0, 1]]"""

"from sklearn.preprocessing import LabelBinarizer\nlb = LabelBinarizer()\ny_lagged['spot_id_delta'] = lb.fit_transform(y_lagged['spot_id_delta'])  # This will convert [-1, 1] to [0, 1] and then to [[1, 0], [0, 1]]"

In [350]:
y_lagged

Unnamed: 0_level_0,spot_id_delta
DELIVERY_START,Unnamed: 1_level_1
2022-01-01 05:00:00+00:00,1
2022-01-01 06:00:00+00:00,-1
2022-01-01 07:00:00+00:00,-1
2022-01-01 08:00:00+00:00,-1
2022-01-01 09:00:00+00:00,-1
...,...
2023-03-29 17:00:00+00:00,1
2023-03-29 18:00:00+00:00,1
2023-03-29 19:00:00+00:00,1
2023-03-29 20:00:00+00:00,1


In [351]:
test_size = 0.2
val_size = 0.1

# Calculate the number of examples that will be in the test set
num_test = int(len(X_lagged) * test_size)

# Calculate the number of examples that will be in the validation set
num_val = int(len(X_lagged) * val_size)

# Calculate the index where the training set ends and the validation set begins
train_end_idx = len(X_lagged) - num_val - num_test

# Calculate the index where the validation set ends and the test set begins
val_end_idx = train_end_idx + num_val

# Split the data into training, validation, and test sets
X_train = X_lagged[:train_end_idx]
y_train = y_lagged[:train_end_idx]

X_val = X_lagged[train_end_idx:val_end_idx]
y_val = y_lagged[train_end_idx:val_end_idx]

X_test = X_lagged[val_end_idx:]
y_test = y_lagged[val_end_idx:]

X_train.shape, X_val.shape

((7421, 80), (1060, 80))

In [352]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, LSTM, Dense
from tensorflow.keras.optimizers import Adam

num_classes = 1  # Adjust based on your dataset

model2 = Sequential()
model2.add(InputLayer(input_shape=(80, 1)))
model2.add(LSTM(64, return_sequences=True))
model2.add(LSTM(128, return_sequences=True))
model2.add(LSTM(128))
model2.add(Dense(64, activation='relu'))  # An additional Dense layer
model2.add(Dense(8, activation='relu'))
model2.add(Dense(num_classes, activation='softmax'))  # Output layer for multi-class classification

cp = ModelCheckpoint("model2/", save_best_only=True)
model2.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

In [353]:
y_lagged['spot_id_delta'].unique()

array([ 1, -1], dtype=int64)

In [354]:
history = model2.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    callbacks=[cp]
)

Epoch 1/10


INFO:tensorflow:Assets written to: model2\assets


Epoch 2/10


INFO:tensorflow:Assets written to: model2\assets


Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
