Terry Hood Jupyter Notebook 

## Multi-Layer Perceptron (MLP) model

Step 1: Install Required Libraries

In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris

Step 2: Prepare Your Data

In [10]:
# Load the dataset
cleaned_df = pd.read_csv('clean_df.csv')
cleaned_df.head(5)

Unnamed: 0,EDUC,MARSTAT,SERVICES,LOS,PSOURCE,NOPRIOR,ARRESTS,EMPLOY,METHUSE,PSYPROB,...,TRNQFLG,BARBFLG,SEDHPFLG,INHFLG,OTCFLG,OTHERFLG,DIVISION,REGION,IDU,ALCDRUG
0,3,1,7,7,6,1,0,2,2,1,...,0,0,0,0,0,0,9,4,0,1
1,3,4,7,8,1,1,0,2,2,1,...,0,0,0,0,0,1,9,4,0,3
2,3,4,7,7,3,1,0,1,2,2,...,0,0,0,0,0,0,9,4,0,2
3,5,1,7,4,7,0,0,1,2,2,...,0,0,0,0,0,0,9,4,0,1
4,3,4,7,3,7,1,1,4,2,1,...,0,0,0,0,0,0,9,4,0,3


In [11]:
# Set the features variable 
X = cleaned_df.drop(columns='REASON')
# Set the target variable
y = cleaned_df["REASON"]

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [12]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Step 3: Define Define and Train the MLP Model

In [13]:
from sklearn.neural_network import MLPClassifier

# Define the MLP model
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42)

# Train the model
mlp.fit(X_train, y_train)


Predict and Evaluate the MLP Model

In [17]:
from sklearn.metrics import classification_report

# Predict on the test set
y_pred = mlp.predict(X_test)
print(f'Accuracy: {np.mean(y_pred == y_test):.2f}')
print(classification_report(y_test, y_pred))

# Save the model
import joblib
joblib.dump(mlp, 'mlp_model.pkl')


Accuracy: 0.81
              precision    recall  f1-score   support

           0       0.77      0.68      0.72    708990
           1       0.83      0.88      0.85   1223451

    accuracy                           0.81   1932441
   macro avg       0.80      0.78      0.79   1932441
weighted avg       0.81      0.81      0.81   1932441



['mlp_model.pkl']

Predictive Output of the MLP Model

In [20]:
# # Load the model
# mlp = joblib.load('mlp_model.pkl')

# # Make predictions on new data
# # new_data = np.array([[5.1, 3.5, 1.4, 0.2]])  # Example new data
# new_data_scaled = np.random.rand(1, 68)
# new_data_scaled = scaler.transform(new_data_scaled)
# prediction = mlp.predict(new_data_scaled)
# print(f'Predicted class: {prediction[0]}')

import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the model and scaler
mlp = joblib.load('mlp_model.pkl')
scaler = joblib.load('mlp_model.pkl')

# Example feature names (replace these with actual feature names used during training)
feature_names = [f'feature_{i}' for i in range(1, 69)]

# Your actual new data with 68 features (replace with actual values)
new_data = np.random.rand(1, 68)  # This should be replaced with your actual new data

# Convert the new data to a DataFrame with the same feature names
new_data_df = pd.DataFrame(new_data, columns=feature_names)

# Scale the new data
new_data_scaled = scaler.transform(new_data_df)

# Make predictions
prediction = mlp.predict(new_data_scaled)
print(f'Predicted class: {prediction[0]}')


AttributeError: 'MLPClassifier' object has no attribute 'transform'

Step 3: Using TensorFlow/Keras:

In [22]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Set input nodes to the number of features
input_nodes = len(X.columns)

# Define the MLP model
model = Sequential([
    Dense(100, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(50, activation='relu'),
    Dense(3, activation='softmax')  # Adjust the number of neurons to match the number of classes in your dataset
])

# Check the structure of the Sequential model
model.summary()

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy:.2f}')

# Save the model
model.save('mlp_model.h5')

Epoch 1/50
[1m112726/112726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 2ms/step - accuracy: 0.7791 - loss: 0.4459 - val_accuracy: 0.8006 - val_loss: 0.4092
Epoch 2/50
[1m112726/112726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 2ms/step - accuracy: 0.8048 - loss: 0.4027 - val_accuracy: 0.8085 - val_loss: 0.3974
Epoch 3/50
[1m112726/112726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 2ms/step - accuracy: 0.8095 - loss: 0.3956 - val_accuracy: 0.8112 - val_loss: 0.3929
Epoch 4/50
[1m112726/112726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m292s[0m 2ms/step - accuracy: 0.8117 - loss: 0.3923 - val_accuracy: 0.8127 - val_loss: 0.3905
Epoch 5/50
[1m112726/112726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m287s[0m 3ms/step - accuracy: 0.8136 - loss: 0.3897 - val_accuracy: 0.8117 - val_loss: 0.3924
Epoch 6/50
[1m112726/112726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m277s[0m 2ms/step - accuracy: 0.8141 - loss: 0.3885 - val_accuracy: 0.8134



Accuracy: 0.82


In [25]:
# Assuming `model.history` is a History object returned by the `fit` method
# and `cleaned_df` contains the data you want to plot.

# Access the `history` attribute of the `History` object to get the metrics dictionary
metrics_dict = model.history.history

# Now, you can safely access the "loss" list within the metrics dictionary
history_df = pd.DataFrame(cleaned_df, index=range(1, len(metrics_dict["loss"]) + 1))

# Assuming "accuracy" is a key in your `cleaned_df` DataFrame, plot the accuracy
history_df.plot(y="accuracy")

KeyError: 'loss'

In [24]:
# Create a DataFrame containing training history
history_df = pd.DataFrame(cleaned_df, index=range(1,len(model.history["loss"])+1))

# Plot the accuracy
history_df.plot(y="accuracy")

TypeError: 'History' object is not subscriptable

Predict and Evaluate the TensorFlow/Keras Model

In [None]:
from sklearn.metrics import classification_report

# Predict on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred_classes))

Predictive Output of the TensorFlow/Keras Model

In [None]:
# Load the model
model = tf.keras.models.load_model('mlp_model.h5')

# Make predictions on new data
new_data = np.array([[5.1, 3.5, 1.4, 0.2]])  # Example new data
new_data_scaled = scaler.transform(new_data)
prediction = model.predict(new_data_scaled)
predicted_class = np.argmax(prediction, axis=1)
print(f'Predicted class: {predicted_class[0]}')


This code covers the entire process from data preparation to making predictions on new data for both Scikit-learn and TensorFlow/Keras implementations of an MLP model. Adjust the example new data (new_data) to suit your actual use case.

<!-- Step 4: Evaluate and Save the Model
After training, evaluate the model’s performance and save it if necessary. -->