Terry Hood Jupyter Notebook

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Multi-Layer Perceptron (MLP) model - Optimized

Install Required Libraries and Dependencies

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

Prepare the Data

In [4]:
# Load the dataset
# clean_df = pd.read_csv('/content/drive/MyDrive/DU_AI_Bootcamp/23_FinalProject/House_of_Hope/clean_df.csv')
# clean_df.head()

file_path = '/content/drive/MyDrive/DU_AI_Bootcamp/23_FinalProject/House_of_Hope/cleaned_df(old).csv'
cleaned_df = pd.read_csv(file_path)
cleaned_df.head()

Unnamed: 0,EDUC,MARSTAT,SERVICES,LOS,PSOURCE,NOPRIOR,ARRESTS,EMPLOY,METHUSE,PSYPROB,...,TRNQFLG,BARBFLG,SEDHPFLG,INHFLG,OTCFLG,OTHERFLG,DIVISION,REGION,IDU,ALCDRUG
0,4,1,7,8,1,0,0,2,2,1,...,0,0,0,0,0,0,9,4,0,1
1,3,1,7,6,1,0,0,1,2,1,...,0,0,0,0,0,0,9,4,0,3
2,2,1,7,6,1,0,0,4,2,1,...,0,0,0,0,0,0,9,4,0,3
3,3,1,7,8,1,1,0,3,2,1,...,0,0,0,0,0,0,9,4,0,3
4,3,3,7,8,1,1,0,4,2,1,...,0,0,0,0,0,0,9,4,0,1


In [7]:
# Set the features variable
X = cleaned_df.drop('REASON', axis=1)
y = cleaned_df["REASON"]

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

Data Scaling - Standardizing Features

In [8]:
# Standardization: Scaling features to have zero mean and unit variance.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Model Architecture Design

In [9]:
from sklearn.neural_network import MLPClassifier

# Define the MLP model
mlp = MLPClassifier(hidden_layer_sizes=(100, 50), activation='relu', max_iter=1000)

# L2 Regularization (Weight Decay): Adding a penalty to the loss function based on the size of the weights.
# mlp = MLPClassifier(hidden_layer_sizes=(100,), alpha=0.0001, max_iter=1000)

# Early Stopping: Stopping training when performance on a validation set starts to degrade.
# mlp = MLPClassifier(hidden_layer_sizes=(100,), early_stopping=True, validation_fraction=0.1, n_iter_no_change=10)



Hyperparameter Tuning

Hyperparameters to Tune
- Hidden Layer Sizes: Number of neurons and layers.
- Activation Function: Type of activation function.
- Solver: Optimization algorithm (e.g., 'adam', 'sgd').
- Learning Rate: Step size for gradient descent. It can be constant or adaptive.
- Batch Size: Number of samples per gradient update.
- Alpha: Regularization term for weight decay.

Random Search

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, loguniform

param_distributions = {
    'hidden_layer_sizes': [(50,), (100,)], # (50, 50), (100, 50)],
    'activation': ['tanh', 'relu'],
    'solver': ['adam', 'sgd'],
    'alpha': loguniform(1e-4, 1e-2),
    'learning_rate': ['constant', 'adaptive']
}

random_search = RandomizedSearchCV(MLPClassifier(max_iter=1000), param_distributions, n_iter=20, cv=5, n_jobs=-1)
random_search.fit(X_train_scaled, y_train)

print('Best parameters found:\n', random_search.best_params_)


Performance Evaluation

In [None]:
# Cross-Validation
# Evaluate the model’s performance using k-fold cross-validation to ensure it generalizes well.

from sklearn.model_selection import cross_val_score

mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000)
scores = cross_val_score(mlp, X_train_scaled, y_train, cv=5)

print('Cross-validation scores:', scores)
print('Average cross-validation score:', scores.mean())


In [None]:
# Model Metrics
# Evaluate the final model on a test set using appropriate metrics.

from sklearn.metrics import classification_report, confusion_matrix

y_pred = mlp.predict(X_test_scaled)

print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))

