In [None]:
# Title: Model Selection and Performance Evaluation for Prediction of Perinatal Asphyxia
# Author: Cephas Ekow Biney
# Institution: Kwame Nkrumah University of Science and Technology (KNUST)
# Date: 6th October, 2025.
# Description: 
#               This notebook shows the models that were used and the evaluation of their
#               performance on the cleaned neonatal dataset.

In [None]:
# Libraries Used
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, roc_curve, auc

In [None]:
# Use the top predictors for the model development

In [None]:
# Models Selected
models = {
    'lr': LogisticRegression(),
    'svm': SVC(probability=True),
    'dt': DecisionTreeClassifier(),
    'rf': RandomForestClassifier(),
    'xgb': XGBClassifier()
    }


In [None]:
# Defined Parameters
param_grids = {
    'lr': {
        'C': [0.01, 0.1, 1.0, 10.0, 100.0],
        'penalty': ['l1', 'l2', 'elasticnet'],
        'solver': ['lbfgs', 'saga', 'liblinear'],
        'max_iter': [100, 200, 500, 1000]
    },

    'svm': {
        'C': [0.01, 0.1, 1.0, 10.0, 100],
        'gamma': ['scale', 'auto', 0.01, 0.1, 1.0, 10.0],
        'kernel': ['linear', 'rbf'],
        'degree': [1, 2, 3, 4],
        'coef0': [0, 0.1, 0.5, 1]
    },

    'dt': {
        'criterion': ['gini', 'entropy'],
        'max_depth': [None, 5, 10, 20, 30],
        'min_samples_split': [2, 5, 8, 10],
        'max_features': ['sqrt', 'log2']
    },

    'rf': {
        'n_estimators': [50, 100, 200, 300, 500],
        'criterion':['gini', 'entropy'],
        'max_depth': [None, 10, 20, 30, 40],
        'min_samples_split': [2, 5, 8, 10]
    },

    'xgb': {
        'n_estimators': [50, 100, 200, 300, 500],
        'max_depth': [3, 6, 9],  # Removed None
        'learning_rate': [0.01, 0.1, 1.0, 10.0]
    }
}


In [None]:
# GridSearchCV and RandomizedSearchCV
# Randomized Search
def random_search(model, param_distributions, X_train, y_train, n_iter=10):
    
    random_search_cv = RandomizedSearchCV(model, param_distributions, n_iter=n_iter, random_state=42, cv=10, n_jobs=-1)
    random_search_cv.fit(X_train, y_train)
    return random_search_cv.best_estimator_

# Grid Search
def grid_search(model, param_grids, X_train, y_train):

    grid_search_cv = GridSearchCV(model, param_grids, cv=10, n_jobs=-1)
    grid_search_cv.fit(X_train, y_train)
    return grid_search_cv.best_estimator_

In [None]:
# Confusion Matrices

def plot_confusion_matrix(y_true, y_pred, model_name):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
    
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title(f'Confusion Matrix for {model_name}')
    plt.show()


In [None]:
# Model Performance Evaluation
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba) if y_pred_proba is not None else 'N/A'
    
    # Print evaluation results
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"ROC-AUC: {roc_auc if roc_auc != 'N/A' else 'Not applicable'}")
    print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")
    print("-" * 50)

    plot_confusion_matrix(y_test, y_pred, model_name)

In [None]:
# Result
for model_name in models:
    print(f"Performing Random Search on {model_name.upper()}...")
    best_model_random = random_search(models[model_name], param_grids[model_name], X_train, y_train)
    
    print(f"Evaluating {model_name.upper()} after Random Search...")
    evaluate_model(best_model_random, X_test, y_test, model_name.upper())
    
    print(f"Performing Grid Search on {model_name.upper()}...")
    best_model_grid = grid_search(models[model_name], param_grids[model_name], X_train, y_train)
    
    print(f"Evaluating {model_name.upper()} after Grid Search...")
    evaluate_model(best_model_grid, X_test, y_test, model_name.upper())

In [None]:
# Plotting the AUROC Curves

plt.figure(figsize=(10, 8))

for model_name in models:
    # Random Search Evaluation
    print(f"Performing Random Search on {model_name.upper()}...")
    best_model_random = random_search(models[model_name], param_grids[model_name], X_train, y_train)

    print(f"Evaluating {model_name.upper()} after Random Search...")
    y_proba_random = best_model_random.predict_proba(X_test)[:, 1] if hasattr(best_model_random, 'predict_proba') else None
    if y_proba_random is not None:
        fpr_random, tpr_random, _ = roc_curve(y_test, y_proba_random)
        auc_random = auc(fpr_random, tpr_random)
        plt.plot(fpr_random, tpr_random, label=f"{model_name.upper()} - AUC: {auc_random:.2f}")

    # Grid Search Evaluation
    print(f"Performing Grid Search on {model_name.upper()}...")
    best_model_grid = grid_search(models[model_name], param_grids[model_name], X_train, y_train)

    print(f"Evaluating {model_name.upper()} after Grid Search...")
    y_proba_grid = best_model_grid.predict_proba(X_test)[:, 1] if hasattr(best_model_grid, 'predict_proba') else None
    if y_proba_grid is not None:
        fpr_grid, tpr_grid, _ = roc_curve(y_test, y_proba_grid)
        auc_grid = auc(fpr_grid, tpr_grid)
        plt.plot(fpr_grid, tpr_grid, label=f"{model_name.upper()} - AUC: {auc_grid:.2f}")
        

# Finalizing the AUROC Plot
plt.plot([0, 1], [0, 1], 'k--', label="Random Classifier")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("AUROC Curve for All Models")
plt.legend(loc="lower right")
plt.show()