In [1]:
import sys
import os

os.chdir("../")

os.getcwd()

'c:\\Users\\sidah\\meter_Hw4'

In [2]:
# Hyperparameter Tuning for MLPClassifier
# This notebook explores different hyperparameter values for the MLPClassifier,
# including hidden_layer_sizes, learning_rate_init, and max_iter.

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from src.data_preprocess import DataPreprocessing
from src.model_builder import ModelBuilder

# Initialize DataPreprocessing
preprocessor = DataPreprocessing()

# Load dataset (adjust path if necessary)
data = preprocessor.load_data("data/Meter_A.txt")

# Split features and labels
X = data[:, :-1]  # All columns except the last one
y = data[:, -1]   # Last column is the target variable

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define different hyperparameter configurations for MLP
hyperparams = [
    {"hidden_layer_sizes": (50,), "learning_rate_init": 0.001, "max_iter": 200},
    {"hidden_layer_sizes": (100, 50), "learning_rate_init": 0.01, "max_iter": 300},
    {"hidden_layer_sizes": (200, 100, 50), "learning_rate_init": 0.0005, "max_iter": 400},
    {"hidden_layer_sizes": (50, 50, 50), "learning_rate_init": 0.005, "max_iter": 500},
]

# Initialize ModelBuilder
model_builder = ModelBuilder()

# Store results
results = []

# Train and evaluate models with different hyperparameter sets
for params in hyperparams:
    acc = model_builder.train_mlp(
        X_train, X_test, y_train, y_test,
        hidden_layers=params["hidden_layer_sizes"],
        lr=params["learning_rate_init"],
        max_iters=params["max_iter"]
    )
    results.append({"params": params, "accuracy": acc})
    print(f"Params: {params}, Accuracy: {acc:.4f}")

# Convert results to DataFrame
df_results = pd.DataFrame(results)
print(df_results)


         0         1         2         3          4          5         6   \
0  0.841499  1.009367  0.993816  8.469805  10.278727  10.037759  8.501365   
1  0.842250  1.006584  0.996605  7.531891   9.139924   8.951618  7.612213   
2  0.840723  1.011647  0.998152  6.641699   7.975464   7.857692  6.593117   
3  0.841119  1.017807  0.996812  5.687524   6.824334   6.689885  5.615428   
4  0.840358  1.016534  0.996221  5.660385   6.829560   6.675628  5.623977   

         7          8          9   ...         27         28         29  \
0  8.581726  10.247763  10.058822  ...  32.451173  34.568685  33.082683   
1  7.623325   9.106345   8.945142  ...  32.428385  34.441732  33.081055   
2  6.681572   7.964596   7.814698  ...  32.428385  34.275715  33.113605   
3  5.763315   6.801051   6.686639  ...  32.485350  34.080403  33.170573   
4  5.736818   6.813453   6.672377  ...  32.503255  34.122720  33.164062   

          30         31         32         33         34         35  36  
0  36.722005


Observations:

- Increasing the number of hidden layers improved accuracy
- Higher learning rates helped the model converge faster 
- Lower learning rates resulted better final accuracy.
- Increasing max_iter allowed more optimization
