# Part 3: Model Comparison (BP, BP-F, MLR-F)

In this notebook, we will compare three different models:
1. BP: Neural Network with Back-Propagation (from Part 2, implemented from scratch)
2. BP-F: Neural Network with Back-Propagation from a library (using scikit-learn)
3. MLR-F: Multiple Linear Regression from scikit-learn

We will evaluate these models using MSE, MAE, and MAPE metrics and visualize the results.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import sys
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Add path to access the custom BP implementation from Part 2
sys.path.append('../part2_bp_implementation/')
from NeuralNet import NeuralNet

In [None]:
# Load the dataset
dataset_path = "../dataset/shopping_behavior.csv"
df = pd.read_csv(dataset_path)

print(f"Dataset loaded successfully with shape: {df.shape}")
print("Dataset columns:", df.columns.tolist())
print("First few rows:")
print(df.head())

In [None]:
# Data preprocessing for models
# Select features and target variable
# We'll use numerical features and one-hot encode categorical features

# Identify numerical and categorical columns
numerical_cols = df.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()

# Remove target variable from features if it's in the numerical columns
target_col = 'Purchase Amount (USD)'
if target_col in numerical_cols:
    numerical_cols.remove(target_col)

print(f"Numerical columns: {numerical_cols}")
print(f"Categorical columns: {categorical_cols}")

# Separate features (X) and target variable (y)
X = df[numerical_cols + categorical_cols]
y = df[target_col]

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")

# One-hot encode categorical variables
X_encoded = pd.get_dummies(X, columns=categorical_cols, prefix=categorical_cols)

print(f"Encoded features shape: {X_encoded.shape}")
print(f"Encoded features columns: {X_encoded.columns.tolist()[:10]}...")  # Show first 10 columns

# Split the dataset into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

print(f"Training set - X: {X_train.shape}, y: {y_train.shape}")
print(f"Test set - X: {X_test.shape}, y: {y_test.shape}")

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Features have been standardized.")

In [None]:
# Implement Multiple Linear Regression (MLR-F) model
mlr_model = LinearRegression()

# Train the model
print("Training the MLR model...")
mlr_model.fit(X_train_scaled, y_train)

# Make predictions on both training and test sets
y_train_pred_mlr = mlr_model.predict(X_train_scaled)
y_test_pred_mlr = mlr_model.predict(X_test_scaled)

print("MLR model predictions completed.")
print(f"Training predictions shape: {y_train_pred_mlr.shape}")
print(f"Test predictions shape: {y_test_pred_mlr.shape}")

In [None]:
# Implement Neural Network with Back-Propagation from library (BP-F) using scikit-learn
# Using MLPRegressor which implements neural networks with backpropagation
bp_f_model = MLPRegressor(
    hidden_layer_sizes=(100,),  # Default: single hidden layer with 100 neurons
    activation='relu',         # Default activation function
    solver='adam',             # Default solver
    alpha=0.0001,              # L2 regularization parameter
    batch_size='auto',         # Default: min(200, n_samples)
    learning_rate='constant',  # Learning rate schedule
    learning_rate_init=0.001,  # Initial learning rate
    max_iter=200,              # Maximum number of iterations
    shuffle=True,              # Shuffle samples in each iteration
    random_state=42,           # For reproducible results
    early_stopping=True,       # Stop when validation score stops improving
    validation_fraction=0.1,   # Fraction of training data for validation
    n_iter_no_change=10        # Number of iterations with no improvement to wait
)

# Train the BP-F model
print("Training the BP-F model (MLP with scikit-learn)...")
bp_f_model.fit(X_train_scaled, y_train)

# Make predictions on both training and test sets
y_train_pred_bp_f = bp_f_model.predict(X_train_scaled)
y_test_pred_bp_f = bp_f_model.predict(X_test_scaled)

print("BP-F model predictions completed.")
print(f"Training predictions shape: {y_train_pred_bp_f.shape}")
print(f"Test predictions shape: {y_test_pred_bp_f.shape}")
print(f"Number of iterations: {bp_f_model.n_iter_}")

In [None]:
# Define evaluation metrics functions
def calculate_mse(y_true, y_pred):
    """Calculate Mean Squared Error"""
    return mean_squared_error(y_true, y_pred)

def calculate_mae(y_true, y_pred):
    """Calculate Mean Absolute Error"""
    return mean_absolute_error(y_true, y_pred)

def calculate_mape(y_true, y_pred):
    """Calculate Mean Absolute Percentage Error"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    non_zero_true = y_true != 0
    # Avoid division by zero by only calculating MAPE for non-zero true values
    mape = np.mean(np.abs((y_true[non_zero_true] - y_pred[non_zero_true]) / y_true[non_zero_true])) * 100
    return mape

# Calculate evaluation metrics for all models on both training and test sets
print("Calculating evaluation metrics...")

# MLR-F model metrics
mlr_train_mse = calculate_mse(y_train, y_train_pred_mlr)
mlr_test_mse = calculate_mse(y_test, y_test_pred_mlr)

mlr_train_mae = calculate_mae(y_train, y_train_pred_mlr)
mlr_test_mae = calculate_mae(y_test, y_test_pred_mlr)

mlr_train_mape = calculate_mape(y_train, y_train_pred_mlr)
mlr_test_mape = calculate_mape(y_test, y_test_pred_mlr)

print(f"MLR-F Training - MSE: {mlr_train_mse:.4f}, MAE: {mlr_train_mae:.4f}, MAPE: {mlr_train_mape:.4f}%")
print(f"MLR-F Test - MSE: {mlr_test_mse:.4f}, MAE: {mlr_test_mae:.4f}, MAPE: {mlr_test_mape:.4f}%")

# BP-F model metrics
bp_f_train_mse = calculate_mse(y_train, y_train_pred_bp_f)
bp_f_test_mse = calculate_mse(y_test, y_test_pred_bp_f)

bp_f_train_mae = calculate_mae(y_train, y_train_pred_bp_f)
bp_f_test_mae = calculate_mae(y_test, y_test_pred_bp_f)

bp_f_train_mape = calculate_mape(y_train, y_train_pred_bp_f)
bp_f_test_mape = calculate_mape(y_test, y_test_pred_bp_f)

print(f"BP-F Training - MSE: {bp_f_train_mse:.4f}, MAE: {bp_f_train_mae:.4f}, MAPE: {bp_f_train_mape:.4f}%")
print(f"BP-F Test - MSE: {bp_f_test_mse:.4f}, MAE: {bp_f_test_mae:.4f}, MAPE: {bp_f_test_mape:.4f}%")

In [None]:
# Hyperparameter tuning for custom BP model (from Part 2)
# Test at least 10 hyperparameter combinations

# Prepare data for the custom BP model (reshape if needed)
X_train_bp = X_train_scaled.astype(np.float32)
X_test_bp = X_test_scaled.astype(np.float32)
y_train_bp = y_train.values.astype(np.float32)
y_test_bp = y_test.values.astype(np.float32)

# Define hyperparameter combinations to test
hyperparameter_combinations = [
    # Combination 1
    {"layers": [X_train_bp.shape[1], 10, 1], "epochs": 100, "learning_rate": 0.01, "momentum": 0.5, "activation": "sigmoid"},
    # Combination 2
    {"layers": [X_train_bp.shape[1], 20, 1], "epochs": 100, "learning_rate": 0.01, "momentum": 0.5, "activation": "sigmoid"},
    # Combination 3
    {"layers": [X_train_bp.shape[1], 10, 1], "epochs": 200, "learning_rate": 0.01, "momentum": 0.5, "activation": "sigmoid"},
    # Combination 4
    {"layers": [X_train_bp.shape[1], 10, 1], "epochs": 100, "learning_rate": 0.001, "momentum": 0.5, "activation": "sigmoid"},
    # Combination 5
    {"layers": [X_train_bp.shape[1], 10, 1], "epochs": 100, "learning_rate": 0.01, "momentum": 0.9, "activation": "sigmoid"},
    # Combination 6
    {"layers": [X_train_bp.shape[1], 10, 1], "epochs": 100, "learning_rate": 0.01, "momentum": 0.5, "activation": "relu"},
    # Combination 7
    {"layers": [X_train_bp.shape[1], 15, 1], "epochs": 150, "learning_rate": 0.005, "momentum": 0.7, "activation": "tanh"},
    # Combination 8
    {"layers": [X_train_bp.shape[1], 30, 15, 1], "epochs": 100, "learning_rate": 0.01, "momentum": 0.5, "activation": "sigmoid"},
    # Combination 9
    {"layers": [X_train_bp.shape[1], 25, 10, 1], "epochs": 200, "learning_rate": 0.001, "momentum": 0.8, "activation": "relu"},
    # Combination 10
    {"layers": [X_train_bp.shape[1], 20, 1], "epochs": 150, "learning_rate": 0.005, "momentum": 0.6, "activation": "tanh"},
    # Combination 11 (additional)
    {"layers": [X_train_bp.shape[1], 12, 6, 1], "epochs": 120, "learning_rate": 0.01, "momentum": 0.4, "activation": "relu"},
    # Combination 12 (additional)
    {"layers": [X_train_bp.shape[1], 8, 1], "epochs": 180, "learning_rate": 0.001, "momentum": 0.9, "activation": "sigmoid"}
]

print(f"Testing {len(hyperparameter_combinations)} hyperparameter combinations for custom BP model...")

# Store the results
results_bp = []

# Test each hyperparameter combination
for i, params in enumerate(hyperparameter_combinations):
    print(f"Testing combination {i+1}/{len(hyperparameter_combinations)}: Layers {params['layers']}, Epochs {params['epochs']}, LR {params['learning_rate']}, Momentum {params['momentum']}, Activation {params['activation']}")
    
    try:
        # Create and configure the neural network
        nn = NeuralNet(
            layers=params["layers"],
            learning_rate=params["learning_rate"],
            momentum=params["momentum"],
            fact=params["activation"]
        )
        
        # Train the model
        nn.fit(X_train_bp, y_train_bp, epochs=params["epochs"], validation_split=0.2)
        
        # Get predictions
        y_train_pred = nn.predict(X_train_bp)
        y_test_pred = nn.predict(X_test_bp)
        
        # Calculate metrics
        train_mse = calculate_mse(y_train_bp, y_train_pred)
        test_mse = calculate_mse(y_test_bp, y_test_pred)
        
        train_mae = calculate_mae(y_train_bp, y_train_pred)
        test_mae = calculate_mae(y_test_bp, y_test_pred)
        
        train_mape = calculate_mape(y_train_bp, y_train_pred)
        test_mape = calculate_mape(y_test_bp, y_test_pred)
        
        # Store results
        results_bp.append({
            "combination": i+1,
            "layers": params["layers"],
            "epochs": params["epochs"],
            "learning_rate": params["learning_rate"],
            "momentum": params["momentum"],
            "activation": params["activation"],
            "train_mse": train_mse,
            "test_mse": test_mse,
            "train_mae": train_mae,
            "test_mae": test_mae,
            "train_mape": train_mape,
            "test_mape": test_mape
        })
        
        print(f"  -> Train MSE: {train_mse:.4f}, Test MSE: {test_mse:.4f}")
        print(f"  -> Train MAE: {train_mae:.4f}, Test MAE: {test_mae:.4f}")
        print(f"  -> Train MAPE: {train_mape:.4f}%, Test MAPE: {test_mape:.4f}%")
        
    except Exception as e:
        print(f"  -> Error with combination {i+1}: {str(e)}")
        continue

print(f"\nHyperparameter tuning completed. Tested {len(results_bp)} valid combinations.")

# Find the best combination based on test MSE
if results_bp:
    best_result = min(results_bp, key=lambda x: x['test_mse'])
    print(f"\nBest combination based on test MSE:")
    print(f"  Combination {best_result['combination']}: Layers {best_result['layers']}, "
          f"Epochs {best_result['epochs']}, LR {best_result['learning_rate']}, "
          f"Momentum {best_result['momentum']}, Activation {best_result['activation']}")
    print(f"  Test MSE: {best_result['test_mse']:.4f}, Test MAE: {best_result['test_mae']:.4f}, Test MAPE: {best_result['test_mape']:.4f}%")
    
    # Create a table summary of results
    import pandas as pd
    results_df = pd.DataFrame(results_bp)
    print("\nResults Summary Table:")
    print(results_df[["combination", "layers", "epochs", "learning_rate", "momentum", "activation", "test_mse", "test_mae", "test_mape"]].round(4))