In [1]:
# Initial imports
import pandas as pd
from pathlib import Path

from sklearn import tree
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

import tensorflow as tf

2023-08-17 16:04:27.170117: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Loading data
file_path = Path("Resources/retail_price_cleaned.csv")
df_sales = pd.read_csv(file_path)
df_sales.head()

Unnamed: 0,product_id,product_category_name,month_year,qty_sold,total_price,freight_price,unit_price,product_rating,no_customers,month,...,comp1_price,comp1_prod_rating,comp1_freight_price,comp2_price,comp2_prod_rating,comp2_freight_price,comp3_price,comp3_prod_rating,comp3_freight_price,lag_price
0,bed1,bed_bath_table,01-05-2017,1,45.95,15.1,45.95,4.0,57,5,...,89.9,3.9,15.011897,215.0,4.4,8.76,45.95,4.0,15.1,45.9
1,bed1,bed_bath_table,01-06-2017,3,137.85,12.933333,45.95,4.0,61,6,...,89.9,3.9,14.769216,209.0,4.4,21.322,45.95,4.0,12.933333,45.95
2,bed1,bed_bath_table,01-07-2017,6,275.7,14.84,45.95,4.0,123,7,...,89.9,3.9,13.993833,205.0,4.4,22.195932,45.95,4.0,14.84,45.95
3,bed1,bed_bath_table,01-08-2017,4,183.8,14.2875,45.95,4.0,90,8,...,89.9,3.9,14.656757,199.509804,4.4,19.412885,45.95,4.0,14.2875,45.95
4,bed1,bed_bath_table,01-09-2017,2,91.9,15.1,45.95,4.0,54,9,...,89.9,3.9,18.776522,163.39871,4.4,24.324687,45.95,4.0,15.1,45.95


In [3]:
# Turn dates into datetime for "year","month","month_year"
df_sales['year'] = pd.to_datetime(df_sales['month_year'], format='%d-%m-%Y').dt.year 
df_sales['month'] = pd.to_datetime(df_sales['month_year'], format='%d-%m-%Y').dt.month
df_sales['month_year'] = pd.to_datetime(df_sales['month_year'], format='%d-%m-%Y').dt.strftime('%Y-%m') #.d

In [4]:
df_sales_copy = df_sales.copy()

In [15]:
# Define the model - deep neural net
number_input_features = 25
hidden_nodes_layer1 =  25
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="relu"))

# Check the structure of the model
nn.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 25)                525       
                                                                 
 dense_16 (Dense)            (None, 10)                260       
                                                                 
 dense_17 (Dense)            (None, 1)                 11        
                                                                 
Total params: 796
Trainable params: 796
Non-trainable params: 0
_________________________________________________________________


In [13]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [16]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np

# Define the window size
window_size = 5

grouped = df_sales.groupby('product_id')

table_data = []
mse_scores = []
predicted_unit_prices = []

for group_key, group_data in grouped:
    # set features and target variable
    features = group_data[['qty_sold', 'total_price', 'freight_price', 'product_rating',
                           'no_customers', 'seasonality', 'volume', 'comp1_price',
                           'comp1_prod_rating', 'comp1_freight_price', 'comp2_price',
                           'comp2_prod_rating', 'comp2_freight_price', 'comp3_price',
                           'comp3_prod_rating', 'comp3_freight_price', 'lag_price']]

    target = group_data['unit_price']
    
    num_samples = len(features)
    predicted_unit_prices = []
    
    # sliding window training and testing
    for i in range(window_size, num_samples):
        # Define the periods for training and testing
        train_start = i - window_size
        train_end = i
        test_start = i
        test_end = i + 1
        
        # Split data into training and testing sets
        features_train = features.iloc[train_start:train_end]
        target_train = target.iloc[train_start:train_end]
        features_test = features.iloc[test_start:test_end]
        target_test = target.iloc[test_start:test_end]
        
        # Train the model
        nn.fit(features_train,target_train,epochs=100)
        # Make predictions
        y_pred = nn.predict(features_test)
        
        # Evaluate using Mean Squared Error
        mse = mean_squared_error(target_test, y_pred)
        mse_scores.append(mse)
        
        # Calculate and store the predicted unit prices
        predicted_unit_price = y_pred[0]
        predicted_unit_prices.append(predicted_unit_price)
        
        table_data.append([group_key, i+1, predicted_unit_price, mse])

# Print table
table_headers = ["Product ID", "Sample", "Predicted Price", "MSE"]
print(tabulate(table_data, headers=table_headers, floatfmt=(".0f", ".0f", ".2f", ".2f")))

avg_mse = np.mean(mse_scores)
avg_predicted_unit_price = np.mean(predicted_unit_prices)
print(f"Average MSE: {avg_mse:.2f}")
print(f"Average Predicted Unit Price: {avg_predicted_unit_price:.2f}")

RuntimeError: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`.

## ML modeling

In [None]:
# Seperate the features, X,  from the target variable, y
y = df_sales_copy['quantity']
X = df_sales_copy.drop(columns='quantity').values

In [None]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [None]:
#Scale

# Instantiate a StandardScaler instance
scaler = StandardScaler()

# Fit the training data to the standard scaler
X_scaler = scaler.fit(X_train)

# Transform the training data using the scaler
X_train_scaled = X_scaler.transform(X_train)

# Transform the testing data using the scaler
X_test_scaled = X_scaler.transform(X_test)

In [None]:
X_train_scaled

In [None]:
# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  50
hidden_nodes_layer2 = 25

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="relu"))

# Check the structure of the model
nn.summary()

In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100)