# Part 3: Model Comparison (BP, BP-F, MLR-F)

In this notebook, we will compare three different models:
1. BP: Neural Network with Back-Propagation (from Part 2, implemented from scratch)
2. BP-F: Neural Network with Back-Propagation from a library (using scikit-learn)
3. MLR-F: Multiple Linear Regression from scikit-learn

We will evaluate these models using MSE, MAE, and MAPE metrics and visualize the results.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import sys
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
# Load the dataset
dataset_path = "../dataset/shopping_behavior.csv"
df = pd.read_csv(dataset_path)

print(f"Dataset loaded successfully with shape: {df.shape}")
print("Dataset columns:", df.columns.tolist())
print("First few rows:")
print(df.head())

In [None]:
# Data preprocessing for MLR
# Select features and target variable
# We'll use numerical features and one-hot encode categorical features

# Identify numerical and categorical columns
numerical_cols = df.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()

# Remove target variable from features if it's in the numerical columns
target_col = 'Purchase Amount (USD)'
if target_col in numerical_cols:
    numerical_cols.remove(target_col)

print(f"Numerical columns: {numerical_cols}")
print(f"Categorical columns: {categorical_cols}")

# Separate features (X) and target variable (y)
X = df[numerical_cols + categorical_cols]
y = df[target_col]

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")

# One-hot encode categorical variables
X_encoded = pd.get_dummies(X, columns=categorical_cols, prefix=categorical_cols)

print(f"Encoded features shape: {X_encoded.shape}")
print(f"Encoded features columns: {X_encoded.columns.tolist()[:10]}...")  # Show first 10 columns

# Split the dataset into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

print(f"Training set - X: {X_train.shape}, y: {y_train.shape}")
print(f"Test set - X: {X_test.shape}, y: {y_test.shape}")

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Features have been standardized.")

In [None]:
# Implement Multiple Linear Regression (MLR-F) model
mlr_model = LinearRegression()

# Train the model
print("Training the MLR model...")
mlr_model.fit(X_train_scaled, y_train)

# Make predictions on both training and test sets
y_train_pred_mlr = mlr_model.predict(X_train_scaled)
y_test_pred_mlr = mlr_model.predict(X_test_scaled)

print("MLR model predictions completed.")
print(f"Training predictions shape: {y_train_pred_mlr.shape}")
print(f"Test predictions shape: {y_test_pred_mlr.shape}")

In [None]:
# Implement Neural Network with Back-Propagation from library (BP-F) using scikit-learn
# Using MLPRegressor which implements neural networks with backpropagation
bp_f_model = MLPRegressor(
    hidden_layer_sizes=(100,),  # Default: single hidden layer with 100 neurons
    activation='relu',         # Default activation function
    solver='adam',             # Default solver
    alpha=0.0001,              # L2 regularization parameter
    batch_size='auto',         # Default: min(200, n_samples)
    learning_rate='constant',  # Learning rate schedule
    learning_rate_init=0.001,  # Initial learning rate
    max_iter=200,              # Maximum number of iterations
    shuffle=True,              # Shuffle samples in each iteration
    random_state=42,           # For reproducible results
    early_stopping=True,       # Stop when validation score stops improving
    validation_fraction=0.1,   # Fraction of training data for validation
    n_iter_no_change=10        # Number of iterations with no improvement to wait
)

# Train the BP-F model
print("Training the BP-F model (MLP with scikit-learn)...")
bp_f_model.fit(X_train_scaled, y_train)

# Make predictions on both training and test sets
y_train_pred_bp_f = bp_f_model.predict(X_train_scaled)
y_test_pred_bp_f = bp_f_model.predict(X_test_scaled)

print("BP-F model predictions completed.")
print(f"Training predictions shape: {y_train_pred_bp_f.shape}")
print(f"Test predictions shape: {y_test_pred_bp_f.shape}")
print(f"Number of iterations: {bp_f_model.n_iter_}")