# LSTM Modelling

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, BatchNormalization, Input, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
# Load dataset
data = pd.read_csv("")

In [None]:
# Standardize the target column (total_amount)

data[['norm_amount']] = StandardScaler().fit_transform(data[['total_amount']])

In [None]:
# Define target column
target_col = 'total_amount'

In [None]:
# Prepare features (X) and target (y)
# Include lag features and seasonal features for the model
X = data[[f'{target_col}_lag_{i}' for i in range(1, 11)] +  # Example using 10 lags
          []]

y = data[]

In [None]:
# Normalize the data
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [None]:
# Reshape data for LSTM (samples, time_steps, features)
X = X.reshape((X.shape[0], X.shape[1], 1))

In [None]:
# Split the data into train and test sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

Long Short-Term Memory (LSTM) layers are essential for time series data analysis due to their specific design to capture temporal dependencies. These layers possess the capability to learn patterns over time, retaining pertinent information from preceding time steps while discarding irrelevant data. This attribute is paramount in time series forecasting, where past observations significantly influence future outcomes. By stacking multiple LSTM layers, the model can simultaneously learn both low-level and high-level temporal features. The initial LSTM layer captures fundamental temporal patterns, while the subsequent layer abstracts these patterns into more intricate features. Dropout and batch normalization techniques are employed to mitigate overfitting and enhance the training process. Time series models are particularly susceptible to overfitting due to the sequential nature of the data, and dropout ensures that the model does not excessively rely on specific time steps or features. Batch normalization stabilizes the learning process, which is particularly crucial in deep learning models such as LSTMs. Subsequently, the Dense layers refine the learned features and culminate in the final prediction. In time series forecasting, it is imperative to process time-dependent features prior to making predictions. This architectural choice is justified because LSTMs are well-suited to handle time-dependent data, and the combination of additional layers and regularization techniques ensures that the model strikes a balance between power and robustness.

In [None]:
# Build the dense model
model = Sequential()

# Input Layer
model.add(Input(shape=(X_train.shape[1], 1)))

# First LSTM Layer
model.add(LSTM(64, activation='tanh', return_sequences=True))
model.add(Dropout(0.3))
model.add(BatchNormalization())

# Second LSTM Layer
model.add(LSTM(128, activation='tanh', return_sequences=False))
model.add(Dropout(0.3))
model.add(BatchNormalization())

# Dense Layers
model.add(Dense(64, activation='relu', kernel_regularizer='l2'))
model.add(Dropout(0.3))

# Output Layer
model.add(Dense(1, activation='linear'))

In [None]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

In [None]:
# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[lr_scheduler, early_stopping])

Epoch 1/50
[1m2497/2497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 11ms/step - loss: 1.4192 - mae: 0.6969 - val_loss: 1.2906 - val_mae: 0.8463 - learning_rate: 0.0010
Epoch 2/50
[1m2497/2497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 10ms/step - loss: 0.4137 - mae: 0.4219 - val_loss: 0.7346 - val_mae: 0.6471 - learning_rate: 0.0010
Epoch 3/50
[1m2497/2497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 11ms/step - loss: 0.2116 - mae: 0.3056 - val_loss: 0.8640 - val_mae: 0.7194 - learning_rate: 0.0010
Epoch 4/50
[1m2497/2497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 11ms/step - loss: 0.1597 - mae: 0.2685 - val_loss: 0.5602 - val_mae: 0.5807 - learning_rate: 0.0010
Epoch 5/50
[1m2497/2497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 10ms/step - loss: 0.1341 - mae: 0.2472 - val_loss: 0.2672 - val_mae: 0.3819 - learning_rate: 0.0010
Epoch 6/50
[1m2497/2497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 11ms/step - loss: 0.

In [None]:
# Evaluate the model
y_pred = model.predict(X_test).flatten()
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step


Prints the evaluation metrics for the model’s performance in a formatted manner.

In [None]:
print("Model Evaluation Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-squared (R²): {r2:.2f}")

Model Evaluation Metrics:
Mean Absolute Error (MAE): 0.07
Mean Squared Error (MSE): 0.01
Root Mean Squared Error (RMSE): 0.10
R-squared (R²): 0.99
