In [None]:
# Title: Sensitivity Analysis for Prediction of Perinatal Asphyxia
# Author: Cephas Ekow Biney
# Institution: Kwame Nkrumah University of Science and Technology (KNUST)
# Date: 6th October, 2025.
# Description: 
#               This notebook shows the sensitivty analysis conducted to observe
#               the variations across the subsets of the dataset.

In [None]:
# Library Used
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, roc_curve, auc


In [None]:
# Number of subsets
n_splits = 10
metrics = []

In [None]:
for i in range(n_splits):
    # Initializing random subsets (10% of data)
    X_subset, _, y_subset, _ = train_test_split(X, y, test_size=0.9, stratify=y, random_state=i)

    # Splitting into train-test
    X_train, X_test, y_train, y_test = train_test_split(X_subset, y_subset, test_size=0.2, random_state=i)

    # Using Random Forest with recall optimization
    model = RandomForestClassifier(
        n_estimators=500,            # Increase number of trees for stability
        criterion="gini",
        max_depth=None,               # Allow deep trees
        min_samples_split=2,          # Allow more splits
        min_samples_leaf=1,           # Allow small leaves
        class_weight="balanced",      # Adjust for class imbalance
        random_state=i
    )
    
    # Training the model
    model.fit(X_train, y_train)

    # Making predictions
    y_prob = model.predict_proba(X_test)[:, 1]  # Probability scores
    y_pred = (y_prob >= 0.4).astype(int)  # Lower threshold to 0.4 for better recall

    # Computing metrics
    metrics.append({
        "Subset": i + 1,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1-score": f1_score(y_test, y_pred),
        "AUC": roc_auc_score(y_test, y_prob)
    })


In [None]:
# Converting results to a DataFrame
results_df = pd.DataFrame(metrics)
print(results_df)

# Computing mean metrics
mean_metrics = results_df.mean(numeric_only=True)

print("\nMean of Metrics:")
print(mean_metrics)