In [1]:
import streamlit as st
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, label_binarize

In [2]:
# Load the datasets
X_train_binary = pd.read_csv('Binary/Model/Binary_Train_Data.csv')
X_test_binary = pd.read_csv('Binary/Model/Binary_Test_Data.csv')
X_train_multi = pd.read_csv('Multi/Model/Multi_Train_Data.csv')
X_test_multi = pd.read_csv('Multi/Model/Multi_Test_Data.csv')

In [3]:
# Standardize the datasets
binary_scaler = StandardScaler()
multi_scaler = StandardScaler()
X_train_binary_scaled = binary_scaler.fit_transform(X_train_binary)
X_test_binary_scaled = binary_scaler.transform(X_test_binary)
X_train_multi_scaled = multi_scaler.fit_transform(X_train_multi)
X_test_multi_scaled = multi_scaler.transform(X_test_multi)

In [4]:
y_test_binary = pd.read_csv('Binary/Model/y_test_binary.csv')['at_risk_binary_encoded'].values
y_test_multi = pd.read_csv('Multi/Model/y_test_multi.csv')['at_risk_multi_encoded'].values

In [5]:
# Load the binary models
binary_model_files = [
    'Binary/Model/Base/lr_model_binary.pkl',
    'Binary/Model/Tuned/best_dt_model_binary.pkl',
    'Binary/Model/Tuned/best_rf_model_binary.pkl',
    'Binary/Model/Base/gb_model_binary.pkl',
    'Binary/Model/Tuned/best_xgb_model_binary.pkl'
]

binary_model_names = ['Base Logistic Regression', 'Tuned Decision Tree', 'Tuned Random Forest', 'Base Gradient Boosting', 'Tuned XGBoost']
binary_models = [joblib.load(file_path) for file_path in binary_model_files]
ann_model_binary = tf.keras.models.load_model('Binary/Model/Tuned/best_ann_model_binary.keras')


In [6]:
# Load the multi-class models
multi_model_files = [
    'Multi/Model/Tuned/best_lr_model_multi.pkl',
    'Multi/Model/Tuned/best_dt_model_multi.pkl',
    'Multi/Model/Tuned/best_rf_model_multi.pkl',
    'Multi/Model/Tuned/best_gb_model_multi.pkl',
    'Multi/Model/Tuned/best_xgb_model_multi.pkl'
]

multi_model_names = ['Tuned Logistic Regression', 'Tuned Decision Tree', 'Tuned Random Forest', 'Tuned Gradient Boosting', 'Tuned XGBoost']
multi_models = [joblib.load(file_path) for file_path in multi_model_files]
ann_model_multi = tf.keras.models.load_model('Multi/Model/Tuned/best_ann_model_multi.keras')

In [7]:
# Define evaluation functions
def evaluate_model(model, X_test, y_test, is_binary=True):
    y_pred = model.predict(X_test)
    y_pred_prob = model.predict_proba(X_test)
    
    if is_binary:
        y_pred_prob = y_pred_prob[:, 1]
    
    metrics = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision (Class 0)': precision_score(y_test, y_pred, pos_label=0, average='binary') if is_binary else precision_score(y_test, y_pred, labels=[0], average=None)[0],
        'Precision (Class 1)': precision_score(y_test, y_pred, pos_label=1, average='binary') if is_binary else precision_score(y_test, y_pred, labels=[1], average=None)[0],
        'Precision (Class 2)': '-' if is_binary else (precision_score(y_test, y_pred, labels=[2], average=None)[0] if len(np.unique(y_test)) > 2 else '-'),
        'Recall (Class 0)': recall_score(y_test, y_pred, pos_label=0, average='binary') if is_binary else recall_score(y_test, y_pred, labels=[0], average=None)[0],
        'Recall (Class 1)': recall_score(y_test, y_pred, pos_label=1, average='binary') if is_binary else recall_score(y_test, y_pred, labels=[1], average=None)[0],
        'Recall (Class 2)': '-' if is_binary else (recall_score(y_test, y_pred, labels=[2], average=None)[0] if len(np.unique(y_test)) > 2 else '-'),
        'F1 Score (Class 0)': f1_score(y_test, y_pred, pos_label=0, average='binary') if is_binary else f1_score(y_test, y_pred, labels=[0], average=None)[0],
        'F1 Score (Class 1)': f1_score(y_test, y_pred, pos_label=1, average='binary') if is_binary else f1_score(y_test, y_pred, labels=[1], average=None)[0],
        'F1 Score (Class 2)': '-' if is_binary else (f1_score(y_test, y_pred, labels=[2], average=None)[0] if len(np.unique(y_test)) > 2 else '-'),
        'ROC AUC (Class 0)': roc_auc_score(label_binarize(y_test, classes=[0, 1, 2])[:, 0], y_pred_prob[:, 0]) if not is_binary else roc_auc_score(y_test, y_pred_prob),
        'ROC AUC (Class 1)': roc_auc_score(label_binarize(y_test, classes=[0, 1, 2])[:, 1], y_pred_prob[:, 1]) if not is_binary else '-',
        'ROC AUC (Class 2)': '-' if is_binary else (roc_auc_score(label_binarize(y_test, classes=[0, 1, 2])[:, 2], y_pred_prob[:, 2]) if len(np.unique(y_test)) > 2 else '-')
    }
    return metrics, y_pred_prob, y_pred

In [8]:
def evaluate_ann_model(model, X_test, y_test, is_binary=True):
    y_pred_prob = model.predict(X_test)
    
    if is_binary:
        # Ensure the probabilities are in the correct shape
        if y_pred_prob.shape[1] == 1:
            y_pred_prob = np.hstack([1 - y_pred_prob, y_pred_prob])  # Add the second column for binary classification
        y_pred = (y_pred_prob[:, 1] > 0.5).astype(int)
        y_pred_prob = y_pred_prob[:, 1]
    else:
        y_pred = y_pred_prob.argmax(axis=1)
    
    metrics = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision (Class 0)': precision_score(y_test, y_pred, pos_label=0, average='binary') if is_binary else precision_score(y_test, y_pred, labels=[0], average=None)[0],
        'Precision (Class 1)': precision_score(y_test, y_pred, pos_label=1, average='binary') if is_binary else precision_score(y_test, y_pred, labels=[1], average=None)[0],
        'Precision (Class 2)': '-' if is_binary else (precision_score(y_test, y_pred, labels=[2], average=None)[0] if len(np.unique(y_test)) > 2 else '-'),
        'Recall (Class 0)': recall_score(y_test, y_pred, pos_label=0, average='binary') if is_binary else recall_score(y_test, y_pred, labels=[0], average=None)[0],
        'Recall (Class 1)': recall_score(y_test, y_pred, pos_label=1, average='binary') if is_binary else recall_score(y_test, y_pred, labels=[1], average=None)[0],
        'Recall (Class 2)': '-' if is_binary else (recall_score(y_test, y_pred, labels=[2], average=None)[0] if len(np.unique(y_test)) > 2 else '-'),
        'F1 Score (Class 0)': f1_score(y_test, y_pred, pos_label=0, average='binary') if is_binary else f1_score(y_test, y_pred, labels=[0], average=None)[0],
        'F1 Score (Class 1)': f1_score(y_test, y_pred, pos_label=1, average='binary') if is_binary else f1_score(y_test, y_pred, labels=[1], average=None)[0],
        'F1 Score (Class 2)': '-' if is_binary else (f1_score(y_test, y_pred, labels=[2], average=None)[0] if len(np.unique(y_test)) > 2 else '-'),
        'ROC AUC (Class 0)': roc_auc_score(label_binarize(y_test, classes=[0, 1, 2])[:, 0], y_pred_prob[:, 0]) if not is_binary else roc_auc_score(y_test, y_pred_prob),
        'ROC AUC (Class 1)': roc_auc_score(label_binarize(y_test, classes=[0, 1, 2])[:, 1], y_pred_prob[:, 1]) if not is_binary else '-',
        'ROC AUC (Class 2)': '-' if is_binary else (roc_auc_score(label_binarize(y_test, classes=[0, 1, 2])[:, 2], y_pred_prob[:, 2]) if len(np.unique(y_test)) > 2 else '-')
    }
    return metrics, y_pred_prob, y_pred

In [9]:
# Evaluate binary models
binary_results = []
binary_pred_probs = []
binary_preds = []

for model, name in zip(binary_models, binary_model_names):
    metrics, y_pred_prob, y_pred = evaluate_model(model, X_test_binary_scaled, y_test_binary, is_binary=True)
    metrics['Model'] = name
    metrics['Type'] = 'Binary'
    binary_results.append(metrics)
    binary_pred_probs.append((name, y_pred_prob))
    binary_preds.append((name, y_pred))

ann_metrics, y_pred_prob_ann, y_pred_ann = evaluate_ann_model(ann_model_binary, X_test_binary_scaled, y_test_binary, is_binary=True)
ann_metrics['Model'] = 'Tuned ANN'
ann_metrics['Type'] = 'Binary'
binary_results.append(ann_metrics)
binary_pred_probs.append(('Tuned ANN', y_pred_prob_ann))
binary_preds.append(('Tuned ANN', y_pred_ann))

[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 889us/step


In [10]:
# Evaluate multi-class models
multi_results = []
multi_pred_probs = []
multi_preds = []

for model, name in zip(multi_models, multi_model_names):
    metrics, y_pred_prob, y_pred = evaluate_model(model, X_test_multi_scaled, y_test_multi, is_binary=False)
    metrics['Model'] = name
    metrics['Type'] = 'Multi-Class'
    multi_results.append(metrics)
    multi_pred_probs.append((name, y_pred_prob))
    multi_preds.append((name, y_pred))

ann_metrics, y_pred_prob_ann, y_pred_ann = evaluate_ann_model(ann_model_multi, X_test_multi_scaled, y_test_multi, is_binary=False)
ann_metrics['Model'] = 'Tuned ANN'
ann_metrics['Type'] = 'Multi-Class'
multi_results.append(ann_metrics)
multi_pred_probs.append(('Tuned ANN', y_pred_prob_ann))
multi_preds.append(('Tuned ANN', y_pred_ann))

[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [11]:
# Combine results
combined_results = pd.DataFrame(binary_results + multi_results)

In [12]:
# Reorder the metrics
desired_order = [
    'Accuracy',
    'Precision (Class 0)', 'Precision (Class 1)', 'Precision (Class 2)',
    'Recall (Class 0)', 'Recall (Class 1)', 'Recall (Class 2)',
    'F1 Score (Class 0)', 'F1 Score (Class 1)', 'F1 Score (Class 2)',
    'ROC AUC (Class 0)', 'ROC AUC (Class 1)', 'ROC AUC (Class 2)'
]

# Define the desired order of models within each type
binary_model_order = ['Base Logistic Regression', 'Tuned Decision Tree', 'Tuned Random Forest', 'Base Gradient Boosting', 'Tuned XGBoost', 'Tuned ANN']
multi_model_order = ['Tuned Logistic Regression', 'Tuned Decision Tree', 'Tuned Random Forest', 'Tuned Gradient Boosting', 'Tuned XGBoost', 'Tuned ANN']

# Transform the DataFrame to have metrics as rows and models as columns
combined_results_pivot = combined_results.melt(id_vars=['Type', 'Model'], var_name='Metric', value_name='Value')
combined_results_pivot = combined_results_pivot.pivot_table(index='Metric', columns=['Type', 'Model'], values='Value', aggfunc='first')

# Reindex the DataFrame to the desired order
combined_results_pivot = combined_results_pivot.reindex(desired_order)

# Reorder the columns within each type based on the desired model order
binary_columns = [col for col in binary_model_order if col in combined_results_pivot['Binary'].columns]
multi_columns = [col for col in multi_model_order if col in combined_results_pivot['Multi-Class'].columns]

# Combine the ordered columns
ordered_columns = [('Binary', col) for col in binary_columns] + [('Multi-Class', col) for col in multi_columns]

# Reindex the DataFrame columns
combined_results_pivot = combined_results_pivot.reindex(columns=pd.MultiIndex.from_tuples(ordered_columns))

# Center-align the text in the DataFrame and add border between types
styled_results = combined_results_pivot.style.set_properties(**{'text-align': 'center'})
styled_results = styled_results.set_table_styles([
    dict(selector='th', props=[('text-align', 'center'), ('font-weight', 'bold')]),
    dict(selector='td', props=[('border', '1px solid black'), ('text-align', 'center')]),
    dict(selector='th', props=[('border', '1px solid black'), ('text-align', 'center')])
])

# Apply thick border between types
styled_results = styled_results.set_table_styles([
    {'selector': 'td:nth-child(6)', 'props': [('border-right', '3px solid black')]},  # Adjust based on the column position
    {'selector': 'th:nth-child(6)', 'props': [('border-right', '3px solid black')]}   # Adjust based on the column position
], overwrite=False)

# Bold the column names including the 'Type'
styled_results = styled_results.set_table_styles([
    {'selector': 'th.col_heading', 'props': [('font-weight', 'bold'), ('text-align', 'center')]},
    {'selector': 'th.row_heading', 'props': [('font-weight', 'bold'), ('text-align', 'center')]},
    {'selector': 'th.level0', 'props': [('font-weight', 'bold'), ('text-align', 'center')]}  # Bold the 'Type' level header
], overwrite=False)

# Apply full border
styled_results = styled_results.set_table_styles([
    dict(selector='th', props=[('border', '1px solid black'), ('text-align', 'center')]),
    dict(selector='td', props=[('border', '1px solid black'), ('text-align', 'center')])
])

# Display the results
print("Model Performance Metrics:")
display(styled_results)

import dataframe_image as dfi
# Save the styled DataFrame as an image
dfi.export(styled_results, 'model_performance_metrics.png')

print("Model Performance Metrics have been saved as an image.")

Model Performance Metrics:


Unnamed: 0_level_0,Binary,Binary,Binary,Binary,Binary,Binary,Multi-Class,Multi-Class,Multi-Class,Multi-Class,Multi-Class,Multi-Class
Unnamed: 0_level_1,Base Logistic Regression,Tuned Decision Tree,Tuned Random Forest,Base Gradient Boosting,Tuned XGBoost,Tuned ANN,Tuned Logistic Regression,Tuned Decision Tree,Tuned Random Forest,Tuned Gradient Boosting,Tuned XGBoost,Tuned ANN
Metric,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Accuracy,0.868692,0.909189,0.918086,0.917012,0.918086,0.915171,0.720969,0.805031,0.812241,0.816383,0.815156,0.807793
Precision (Class 0),0.873112,0.867356,0.879737,0.879940,0.878382,0.871471,0.868664,0.866882,0.874227,0.869591,0.871157,0.873132
Precision (Class 1),0.864972,0.954371,0.958649,0.955961,0.960393,0.962809,0.448669,0.6,0.645995,0.635149,0.653894,0.627764
Precision (Class 2),-,-,-,-,-,-,0.634101,0.799627,0.777447,0.813735,0.788671,0.774433
Recall (Class 0),0.844755,0.953556,0.957454,0.954531,0.959402,0.962325,0.857096,0.958103,0.963949,0.965898,0.966223,0.967847
Recall (Class 1),0.890116,0.869477,0.882849,0.883430,0.881105,0.872965,0.255043,0.419308,0.360231,0.445245,0.381124,0.368156
Recall (Class 2),-,-,-,-,-,-,0.831871,0.836257,0.890351,0.84308,0.882066,0.86501
F1 Score (Class 0),0.858699,0.908416,0.916952,0.915719,0.917106,0.914647,0.862841,0.910213,0.916898,0.915218,0.91623,0.918053
F1 Score (Class 1),0.877364,0.909948,0.919189,0.918266,0.919042,0.915688,0.325218,0.493639,0.462535,0.523507,0.481566,0.464124
F1 Score (Class 2),-,-,-,-,-,-,0.719646,0.817532,0.830077,0.828147,0.832758,0.817219


Model Performance Metrics have been saved as an image.
