In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Bidirectional, LSTM, Dropout, BatchNormalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, r2_score
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("sales_5000000.csv")

In [3]:
df = df[['Order Date', 'Total Profit']] 

In [4]:
df['Order Date'] = pd.to_datetime(df['Order Date'])

In [5]:
df = df.set_index('Order Date').sort_index()

In [6]:
df

Unnamed: 0_level_0,Total Profit
Order Date,Unnamed: 1_level_1
2014-01-01,273270.24
2014-01-01,12185.94
2014-01-01,495008.80
2014-01-01,13457.44
2014-01-01,252651.48
...,...
2024-09-10,12054.82
2024-09-10,241155.20
2024-09-10,162433.49
2024-09-10,315571.12


In [7]:
daily_df = df.groupby('Order Date')['Total Profit'].sum().reset_index()

In [8]:
monthly_df = daily_df.resample('ME', on='Order Date')['Total Profit'].sum().reset_index()
monthly_df.columns = ['Order Date', 'Total Profit']

In [9]:
monthly_df

Unnamed: 0,Order Date,Total Profit
0,2014-01-31,1.556656e+10
1,2014-02-28,1.402973e+10
2,2014-03-31,1.556248e+10
3,2014-04-30,1.510544e+10
4,2014-05-31,1.559264e+10
...,...,...
124,2024-05-31,1.558589e+10
125,2024-06-30,1.506300e+10
126,2024-07-31,1.553073e+10
127,2024-08-31,1.558169e+10


In [10]:
train_data, test_data = train_test_split(monthly_df, test_size=0.2, shuffle=False)

In [11]:
scaler = MinMaxScaler()
train_data["Total Profit"] = scaler.fit_transform(train_data[["Total Profit"]])
test_data["Total Profit"] = scaler.transform(test_data[["Total Profit"]])

In [12]:
train_data

Unnamed: 0,Order Date,Total Profit
0,2014-01-31,0.921351
1,2014-02-28,0.048936
2,2014-03-31,0.919033
3,2014-04-30,0.659589
4,2014-05-31,0.936155
...,...,...
98,2022-03-31,0.926551
99,2022-04-30,0.649426
100,2022-05-31,0.971595
101,2022-06-30,0.636426


In [13]:
test_data

Unnamed: 0,Order Date,Total Profit
103,2022-08-31,0.951341
104,2022-09-30,0.64198
105,2022-10-31,0.949891
106,2022-11-30,0.656276
107,2022-12-31,0.951815
108,2023-01-31,0.936125
109,2023-02-28,0.072866
110,2023-03-31,0.916363
111,2023-04-30,0.667901
112,2023-05-31,0.90818


In [14]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

# Define sequence length
seq_length = 12

# Prepare data for model
values_train = train_data["Total Profit"].values
values_test = test_data["Total Profit"].values

X_train, y_train = create_sequences(values_train, seq_length)
X_test, y_test = create_sequences(values_test, seq_length)

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


In [15]:
X_train.shape

(91, 12, 1)

In [16]:
y_train.shape

(91,)

In [1]:

# Build Improved BiLSTM model
model = Sequential([
    Bidirectional(LSTM(100, return_sequences=True), input_shape=(seq_length, 1)),
    BatchNormalization(),
    Dropout(0.3),
    Bidirectional(LSTM(100, return_sequences=True)),
    BatchNormalization(),
    Dropout(0.3),
    Bidirectional(LSTM(50)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(50, activation='relu'),
    Dense(1)
])

def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

model.compile(optimizer='adam', loss=rmse, metrics=['mae', rmse])

early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping])

NameError: name 'Sequential' is not defined