In [1]:
import pandas as pd
from pandas import DataFrame
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, roc_auc_score, \
    accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler


def drop_empty_items(data_frame: DataFrame) -> DataFrame:
    data_frame = data_frame.drop(['Insulin'], axis=1)
    return data_frame[(data_frame['Glucose'] != 0) & (data_frame['BloodPressure'] != 0) & (data_frame['BMI'] != 0)]


def scale_features(X_train, X_test):
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test


def print_score(y_test, y_pred) -> None:
    # Calculate other evaluation metrics for test set
    print(f"SIMPLE XGBoost")
    print("===========================================================================")
    print("\nClassification Report:\n",
          classification_report(y_test, y_pred, target_names=['non-diabetic', 'diabetic']))
    print("XGBoost Model Evaluation:")

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("Accuracy: ", accuracy)
    print(f"Precision (Weighted): {precision:.6f}")
    print(f"Recall (Weighted): {recall:.6f}")
    print(f"F1-Score (Weighted): {f1:.6f}")

    print("Class distribution before SMOTE:", y_train.value_counts().to_dict())
    print("Class distribution after SMOTE:", pd.Series(y_train_smote).value_counts().to_dict())

    roc_auc = roc_auc_score(y_test, y_pred)
    print(f"ROC-AUC: {roc_auc:.6f}")

    # Print confusion matrix with class labels
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_names = ['non-diabetic', 'diabetic']
    print("\nConfusion Matrix with Class Labels:")
    print(pd.DataFrame(conf_matrix, index=class_names, columns=class_names))

    tn, fp, fn, tp = conf_matrix.ravel()
    specificity = tn / (tn + fp)
    print("Specificity: ", specificity)
    sensitivity = tp / (tp + fn)
    print("Sensitivity: ", sensitivity)

In [4]:
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, GridSearchCV

# Load the PIMA dataset
df = pd.read_csv('data/diabetes.csv')

# Step 1: Remove rows with zero values in 'Glucose', 'BloodPressure', or 'BMI'
df = drop_empty_items(df)

# Define features
features = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'BMI',
            'DiabetesPedigreeFunction', 'Age']

# Splitting dataset into 80-20 split
X = df.drop('Outcome', axis=1)
y = df['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features after split (Z-scores, deviation from paper to avoid leakage)
X_train, X_test = scale_features(X_train, X_test)

smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Define individual models and their hyperparameter grids (from Table 3)
# Random Forest
rf = RandomForestClassifier(random_state=42)
rf_param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'bootstrap': [True, False]
}

# Gradient Boosting
gb = GradientBoostingClassifier(random_state=42)
gb_param_grid = {
    'n_estimators': [50, 100],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5],
    'subsample': [0.8, 1]
}

# MLP
mlp = MLPClassifier(random_state=42, max_iter=2000, solver='adam')
mlp_param_grid = {
    'hidden_layer_sizes': [(50, 50)],
    'alpha': [0.001, 0.01],
    'learning_rate_init': [0.001, 0.01]
}

# XGBoost
xgb = XGBClassifier(random_state=42, eval_metric='logloss')
xgb_param_grid = {
    'n_estimators': [50, 100],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5],
    'colsample_bytree': [0.8, 1.0]
}

# Perform grid search for each model
models = [
    ('rf', rf, rf_param_grid),
    ('xgb', xgb, xgb_param_grid),
    ('mlp', mlp, mlp_param_grid),
    ('gb', gb, gb_param_grid)
]

best_estimators = {}
for name, model, param_grid in models:
    print(f"\nTuning {name}...")
    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        scoring='f1_weighted',
        cv=5,
        n_jobs=-1,
        verbose=1
    )
    grid_search.fit(X_train_smote, y_train_smote)
    best_estimators[name] = grid_search.best_estimator_
    print(f"Best {name} Hyperparameters: {grid_search.best_params_}")
    print(f"Best {name} F1-Weighted Score: {grid_search.best_score_:.4f}")

# Create ensemble model with soft voting
ensemble = VotingClassifier(
    estimators=[
        ('rf', best_estimators['rf']),
        ('xgb', best_estimators['xgb']),
        ('mlp', best_estimators['mlp']),
        ('gb', best_estimators['gb'])
    ],
    voting='soft',
    weights=[2, 1, 1, 1]
)

# Train ensemble on SMOTE-balanced training data
print("\nTraining ensemble model...")
ensemble.fit(X_train_smote, y_train_smote)

# Evaluate ensemble on test set
y_pred = ensemble.predict(X_test)
print_score(y_test, y_pred)


Tuning rf...
Fitting 5 folds for each of 32 candidates, totalling 160 fits
Best rf Hyperparameters: {'bootstrap': False, 'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}
Best rf F1-Weighted Score: 0.8094

Tuning xgb...
Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best xgb Hyperparameters: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}
Best xgb F1-Weighted Score: 0.7996

Tuning mlp...
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best mlp Hyperparameters: {'alpha': 0.01, 'hidden_layer_sizes': (50, 50), 'learning_rate_init': 0.001}
Best mlp F1-Weighted Score: 0.8204

Tuning gb...
Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best gb Hyperparameters: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100, 'subsample': 0.8}
Best gb F1-Weighted Score: 0.7978

Training ensemble model...
SIMPLE XGBoost

Classification Report:
               precision    recall  f1-score 