In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Bidirectional, LSTM, Dropout, BatchNormalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, r2_score
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping

# Load dataset (Replace with actual file path if needed)
data = pd.read_csv("sales_5000000.csv")

# Use only the first 10,000 rows
data = data.head(100000)

# Convert Order Date to datetime and sort data
data["Order Date"] = pd.to_datetime(data["Order Date"])
data = data.sort_values(by="Order Date")

# Extract year and add as a feature
data["Year"] = data["Order Date"].dt.year

# Select relevant columns
time_series = data[["Order Date", "Year", "Total Profit"]].set_index("Order Date")

# Split data into training and testing sets
year_split = data["Year"].quantile(0.8)  # 80% of data for training
train_data = time_series[time_series["Year"] <= year_split]
test_data = time_series[time_series["Year"] > year_split]

# Normalize the profit values
scaler = MinMaxScaler()
train_data["Total Profit"] = scaler.fit_transform(train_data[["Total Profit"]])
test_data["Total Profit"] = scaler.transform(test_data[["Total Profit"]])

# Create sequences
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

# Define sequence length
seq_length = 20

# Prepare data for model
values_train = train_data["Total Profit"].values
values_test = test_data["Total Profit"].values

X_train, y_train = create_sequences(values_train, seq_length)
X_test, y_test = create_sequences(values_test, seq_length)

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Build Improved BiLSTM model
model = Sequential([
    Bidirectional(LSTM(100, return_sequences=True), input_shape=(seq_length, 1)),
    BatchNormalization(),
    Dropout(0.3),
    Bidirectional(LSTM(100, return_sequences=True)),
    BatchNormalization(),
    Dropout(0.3),
    Bidirectional(LSTM(50)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(50, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=['mae', 'mse'])

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train model
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping])

# Predictions
y_pred = model.predict(X_test)

# Inverse transform predictions
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))
y_pred_inv = scaler.inverse_transform(y_pred)

# Compute evaluation metrics (adding small constant to avoid division errors)
mape = mean_absolute_percentage_error(y_test_inv + 1e-9, y_pred_inv + 1e-9) * 100
r2 = r2_score(y_test_inv, y_pred_inv)
accuracy = 100 - mape  # Approximate accuracy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data["Total Profit"] = scaler.fit_transform(train_data[["Total Profit"]])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data["Total Profit"] = scaler.transform(test_data[["Total Profit"]])


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
 561/5248 [==>...........................] - ETA: 2:06 - loss: 0.0475 - mae: 0.1711 - mse: 0.0475

KeyboardInterrupt: 