In [None]:
import sys
import os
sys.path.append(os.path.abspath('../src'))

from data_preprocessing_utils import data_for_training
from utils import compute_error
from models import get_mlp_model

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tabulate import tabulate

import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

from sklearn.neural_network import MLPRegressor

from tensorflow.keras import backend as K
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import ModelCheckpoint

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

# We start by loading the pre-processed data

In [None]:
sales_and_purchase_prices=pd.read_csv('../data/prepocessing/sales_and_purchase_prices.csv')
sales_and_purchase_prices.head()

Unnamed: 0,InventoryId,Store,Brand,Description,Size,SalesQuantity,SalesDollars,SalesPrice,SalesDate,Volume,Classification,ExciseTax,VendorNo,VendorName,Amount,PurchasePrice
0,1_HARDERSFIELD_1004,1,1004,Jim Beam w/2 Rocks Glasses,750.0,1,16.49,16.49,2016-01-01,750,1,0.79,12546,JIM BEAM BRANDS COMPANY,17.28,10.65
1,1_HARDERSFIELD_1004,1,1004,Jim Beam w/2 Rocks Glasses,750.0,2,32.98,16.49,2016-01-02,750,1,1.57,12546,JIM BEAM BRANDS COMPANY,34.55,10.65
2,1_HARDERSFIELD_1004,1,1004,Jim Beam w/2 Rocks Glasses,750.0,1,16.49,16.49,2016-01-03,750,1,0.79,12546,JIM BEAM BRANDS COMPANY,17.28,10.65
3,1_HARDERSFIELD_1004,1,1004,Jim Beam w/2 Rocks Glasses,750.0,1,14.49,14.49,2016-01-08,750,1,0.79,12546,JIM BEAM BRANDS COMPANY,15.28,10.65
4,1_HARDERSFIELD_1005,1,1005,Maker's Mark Combo Pack,750.0,2,69.98,34.99,2016-01-09,375,1,0.79,12546,JIM BEAM BRANDS COMPANY,70.77,27.34


# we aggregate sales by month

In [None]:
# Removal of some columns that are not important for prediction: ["InventoryId","Brand","Volume","VendorNo","Amount","VendorName"]
sales_and_purchase_prices=sales_and_purchase_prices.drop(["InventoryId","Brand","Volume","VendorNo","Amount","SalesDollars"], axis=1)
sales_and_purchase_prices.head()

Unnamed: 0,Store,Description,Size,SalesQuantity,SalesPrice,SalesDate,Classification,ExciseTax,PurchasePrice
0,1,Jim Beam w/2 Rocks Glasses,750.0,1,16.49,2016-01-01,1,0.79,10.65
1,1,Jim Beam w/2 Rocks Glasses,750.0,2,16.49,2016-01-02,1,1.57,10.65
2,1,Jim Beam w/2 Rocks Glasses,750.0,1,16.49,2016-01-03,1,0.79,10.65
3,1,Jim Beam w/2 Rocks Glasses,750.0,1,14.49,2016-01-08,1,0.79,10.65
4,1,Maker's Mark Combo Pack,750.0,2,34.99,2016-01-09,1,0.79,27.34


In [None]:

# Convert date columns to datetime format
sales_and_purchase_prices['SalesDate'] = pd.to_datetime(sales_and_purchase_prices['SalesDate'])

# Créez une nouvelle colonne pour l'année et le mois
sales_and_purchase_prices['Year'] = sales_and_purchase_prices['SalesDate'].dt.year
sales_and_purchase_prices['Month'] = sales_and_purchase_prices['SalesDate'].dt.month

# Groupez les données par année et par mois et calculez les agrégats souhaités pour chaque groupe
sales_month_aggregated=sales_and_purchase_prices.groupby(['Year', 'Month','Description','Store','Classification']).agg({
    'SalesQuantity': 'sum',
    'ExciseTax': 'first',
    'Size': 'mean',
    'PurchasePrice': 'first',
    'SalesPrice':'first',
    "VendorName":'first'
}).reset_index()

sales_month_aggregated.head()

Unnamed: 0,Year,Month,Description,Store,Classification,SalesQuantity,ExciseTax,Size,PurchasePrice,SalesPrice
0,2016,1,(RI) 1,1,1,1,0.79,750.0,26.92,36.99
1,2016,1,(RI) 1,22,1,1,0.79,750.0,26.92,36.99
2,2016,1,(RI) 1,33,1,1,0.79,750.0,26.92,36.99
3,2016,1,(RI) 1,34,1,3,0.79,750.0,26.92,36.99
4,2016,1,(RI) 1,35,1,1,0.79,750.0,26.92,36.99


# Deep learning model optimization

In [None]:
# Assuming sales_month_aggregated is your DataFrame
data = sales_month_aggregated.copy()

#get model input data through the function data_for_training
X_train,X_test,X_val,y_train,y_test,y_val=data_for_training(data)

We're going to use mlp. To start with, we'll create an mlp with minimal parameters to get an idea of performance without model optimization. Then we'll optmize our model using Bayesian optimization.

In [None]:

best_mlp = MLPRegressor(hidden_layer_sizes=(50), activation='relu', alpha=0.008724528119026307, learning_rate='constant')

# Train the model on the training data
best_mlp.fit(X_train, y_train)

# Make predictions on the validation set with the trained model
y_val_pred = best_mlp.predict(X_val)

# Evaluate the performance of the model on the validation set
mse_val = mean_squared_error(y_val, y_val_pred)
r2_val = best_mlp.score(X_val, y_val)

# Make predictions on the test set with the trained model
y_test_pred = best_mlp.predict(X_test)

# Evaluate the performance of the model on the test set
mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = best_mlp.score(X_test, y_test)

print("Validation Set:")
print("MSE (Mean Squared Error) on the validation set:", mse_val)
print("R-squared (Coefficient of Determination) on the validation set:", r2_val)

print("\nTest Set:")
print("MSE (Mean Squared Error) on the test set:", mse_test)
print("R-squared (Coefficient of Determination) on the test set:", r2_test)



Validation Set:
MSE (Mean Squared Error) on the validation set: 488.12988236970637
R-squared (Coefficient of Determination) on the validation set: 0.7197601648857404

Test Set:
MSE (Mean Squared Error) on the test set: 368.26603716408187
R-squared (Coefficient of Determination) on the test set: 0.7670622714625968


Using Bayesian optimization to find the best hyperparameters

In [None]:

# Objective function for Optuna optimization
def objective(trial):
    # Define the hyperparameters to be optimized
    hidden_layer_one = trial.suggest_categorical('hidden_layer_one', [32, 64, 128, 256,512])
    hidden_layer_two = trial.suggest_categorical('hidden_layer_two', [32, 64, 128, 256,512])
    dropout_one = trial.suggest_uniform('dropout_one', 0, 0.5)
    dropout_two = trial.suggest_uniform('dropout_two', 0, 0.5)
    # learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128,256,512])
    epochs = trial.suggest_categorical('epochs', [200])



    model=get_mlp_model(input_shape=(X_train.shape[1],),hidden_layer_one=hidden_layer_one,dropout_one=dropout_one,hidden_layer_two=hidden_layer_two,dropout_two=dropout_two)


    # Define the ModelCheckpoint callback
    checkpoint_filepath = 'model_checkpoint.h5'
    model_checkpoint = ModelCheckpoint(
        filepath=checkpoint_filepath,
        save_best_only=True,
        monitor='val_loss',
        mode='min',
        verbose=0
    )

    # Train the model on the training data with validation data and checkpoint callback
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0
                        , validation_data=(X_val, y_val),
                        callbacks=[model_checkpoint])

    # Load the best weights from the saved checkpoint
    model.load_weights(checkpoint_filepath)

    # Evaluate the performance of the model on the test set
    y_test_pred = model.predict(X_test)

    # Calculate performance metrics
    mse_test = mean_squared_error(y_test, y_test_pred)
    # rmse2, corr2, mae2, rae2, rrse2, mape2, r2_2 = compute_error(y_test.values, y_test_pred.reshape(y_test_pred.shape[0]))

    return mse_test

# Configure Optuna to use the GPU for exhaustive searches
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100, n_jobs=1)  # Use n_jobs=1 to avoid parallelism problems on GPU

# Show best hyperparameters found
print("Best hyperparameters:")
print(study.best_params)
print("Best MSE:", study.best_value)


Using Grid Search

In [None]:

r2=0

for hidden_layer_one in [25,50,75,100,125,150,175,200,225,350,300,350,400]:
  for hidden_layer_two in [325,50,75,100,125,150,175,200,225,350,300,350,400]:
      for dropout_one in [0,0.1,0.2,0.3,0.4,0.5]:
        for dropout_two in [0,0.1,0.2,0.3,0.4,0.5]:
          # for batch_size in [32,64,128,256]:


            model=get_mlp_model(input_shape=(X_train.shape[1],),hidden_layer_one=hidden_layer_one,dropout_one=dropout_one,hidden_layer_two=hidden_layer_two,dropout_two=dropout_two)

            # Define the ModelCheckpoint callback
            checkpoint_filepath = 'model_checkpoint.h5'
            model_checkpoint = ModelCheckpoint(
                filepath=checkpoint_filepath,
                save_best_only=True,
                monitor='val_loss',
                mode='min',
                verbose=0
            )

            # Train the model on the training data with validation data and checkpoint callback
            history = model.fit(X_train, y_train, epochs=300, batch_size=256,
                                verbose=0, validation_data=(X_val, y_val),
                                callbacks=[model_checkpoint])

            # Load the best weights from the saved checkpoint
            model.load_weights(checkpoint_filepath)

            # Evaluate the performance of the model on the test set
            y_test_pred = model.predict(X_test)

            # Calculate performance metrics
            mse_test = mean_squared_error(y_test, y_test_pred)
            rmse2, corr2, mae2, rae2, rrse2, mape2, r2_2 = compute_error(y_test.values, y_test_pred.reshape(y_test_pred.shape[0]))

            if r2_2>r2:
              r2=r2_2
              print("{},{},{},{}".format(hidden_layer_one,hidden_layer_two,dropout_one,dropout_two) )
              print("RMSE:", rmse2)
              print("Corrélation:", corr2)
              print("MAE:", mae2)
              print("RAE:", rae2)
              print("RRSE:", rrse2)
              print("MAPE:", mape2)
              print("R2:", r2_2)
              print("MSE:", mse_test)
              print("----------------------------------")



25,325,0,0
RMSE: 17.18827596116255
Corrélation: 0.9060056717895975
MAE: 7.735881570305495
RAE: 0.4351773700181274
RRSE: 0.432286229822222
MAPE: 112.90243583534834
R2: 0.8131286155060891
MSE: 295.43683051707836
----------------------------------
25,325,0,0.1
RMSE: 16.864753297199485
Corrélation: 0.9082206342405372
MAE: 7.400556704503353
RAE: 0.41631387115568363
RRSE: 0.42414961431857057
MAPE: 113.621389726263
R2: 0.8200971046734078
MSE: 284.41990377540094
----------------------------------
25,50,0,0.2
RMSE: 16.443648404369615
Corrélation: 0.9125594102361978
MAE: 7.513842387390534
RAE: 0.4226866891844379
RRSE: 0.4135587995741229
MAPE: 118.3443511332354
R2: 0.8289691192948104
MSE: 270.39357284652743
----------------------------------
25,100,0,0
RMSE: 16.32324740181189
Corrélation: 0.9138805543279876
MAE: 7.357724214488608
RAE: 0.4139043551636739
RRSE: 0.41053070672873815
MAPE: 114.3944490704389
R2: 0.8314645388328028
MSE: 266.44840574075863
----------------------------------


KeyboardInterrupt: 

Training model with best hyperparamters

In [None]:
# best parameters

hidden_layer_one=50
hidden_layer_two=100
dropout_one=0
dropout_two=0


model=get_mlp_model(input_shape=(X_train.shape[1],),hidden_layer_one=hidden_layer_one,dropout_one=dropout_one,hidden_layer_two=hidden_layer_two,dropout_two=dropout_two)

# Define the ModelCheckpoint callback
checkpoint_filepath = 'model_checkpoint.h5'
model_checkpoint = ModelCheckpoint(
filepath=checkpoint_filepath,
save_best_only=True,
monitor='val_loss',
mode='min',
verbose=0
)

# Train the model on the training data with validation data and checkpoint callback
history = model.fit(X_train, y_train, epochs=300, batch_size=512,
            verbose=2, validation_data=(X_val, y_val),
            callbacks=[model_checkpoint])

# Load the best weights from the saved checkpoint
model.load_weights(checkpoint_filepath)

# Evaluate the performance of the model on the test set
y_test_pred = model.predict(X_test)

# Calculate performance metrics
rmse2, corr2, mae2, rae2, rrse2, mape2, r2_2 = compute_error(y_test.values, y_test_pred.reshape(y_test_pred.shape[0]))

print("RMSE:", rmse2)
print("Corrélation:", corr2)
print("MAE:", mae2)
print("RAE:", rae2)
print("RRSE:", rrse2)
print("MAPE:", mape2)
print("R2:", r2_2)
print("----------------------------------")

Epoch 1/300
193/193 - 10s - loss: 1845.6409 - val_loss: 1741.7413 - 10s/epoch - 51ms/step
Epoch 2/300
193/193 - 2s - loss: 1799.6522 - val_loss: 1654.5475 - 2s/epoch - 10ms/step
Epoch 3/300
193/193 - 2s - loss: 1728.2849 - val_loss: 1684.0879 - 2s/epoch - 10ms/step
Epoch 4/300
193/193 - 2s - loss: 1617.7590 - val_loss: 1446.0284 - 2s/epoch - 10ms/step
Epoch 5/300
193/193 - 2s - loss: 1535.6127 - val_loss: 1352.7496 - 2s/epoch - 10ms/step
Epoch 6/300
193/193 - 2s - loss: 1416.3770 - val_loss: 1244.1658 - 2s/epoch - 10ms/step
Epoch 7/300
193/193 - 2s - loss: 1312.1503 - val_loss: 1548.3198 - 2s/epoch - 12ms/step
Epoch 8/300
193/193 - 2s - loss: 1221.2966 - val_loss: 1051.3567 - 2s/epoch - 11ms/step
Epoch 9/300
193/193 - 2s - loss: 1056.1085 - val_loss: 905.0511 - 2s/epoch - 10ms/step
Epoch 10/300
193/193 - 2s - loss: 951.3734 - val_loss: 1097.1295 - 2s/epoch - 10ms/step
Epoch 11/300
193/193 - 2s - loss: 797.8675 - val_loss: 892.8505 - 2s/epoch - 10ms/step
Epoch 12/300
193/193 - 2s - loss

Save the weights of the best model for later loading

In [None]:
# Save the best model in checkpoint only if it is better
# model.save_weights('../checkpoint/base_model_checkpoint.h5')