**DATASET PREPARATION**

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from joblib import dump, load

In [2]:
df = pd.read_csv(r"D:\BINUS\... SEMESTER 4\Research Methodology in Computer Science\AOL\Models\stroke_risk_dataset.csv")

In [3]:
df = df.drop(columns=['Stroke Risk (%)'])

In [5]:
X = df.drop(columns=['At Risk (Binary)'])
y = df['At Risk (Binary)']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

In [9]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

dump(scaler, 'scaler.pkl')



['scaler.pkl']

In [19]:
df.head()

Unnamed: 0,Chest Pain,Shortness of Breath,Irregular Heartbeat,Fatigue & Weakness,Dizziness,Swelling (Edema),Pain in Neck/Jaw/Shoulder/Back,Excessive Sweating,Persistent Cough,Nausea/Vomiting,High Blood Pressure,Chest Discomfort (Activity),Cold Hands/Feet,Snoring/Sleep Apnea,Anxiety/Feeling of Doom,Age,At Risk (Binary)
0,0,1,1,1,0,0,0,1,1,1,0,1,1,0,0,54,1
1,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,49,0
2,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,62,1
3,1,0,1,1,0,1,1,1,1,1,1,0,0,0,0,48,1
4,0,0,1,0,0,1,0,1,0,1,1,0,0,1,1,61,1


**LOADING MODELS**

In [14]:
model_paths = [
    r'CatBoost\model.ipynb',
    r'KNN\model.joblib',
    r'LightGBM\model.joblib',
    r'Naive Bayes Classifier\model.joblib',
    r'Random Forest\model.joblib',
    r'XGBoost\model.joblib'
]

In [15]:
model_names = [f'Model {i+1}' for i in range(len(model_paths))]

In [16]:
results = []

import joblib
for i, path in enumerate(model_paths):
    model = joblib.load(path)  # or use pickle.load(open(path, 'rb')) if you used pickle
    
    y_pred = model.predict(X_test)
    
    results.append({
        'Model': model_names[i],
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred, average='weighted'),
        'Recall': recall_score(y_test, y_pred, average='weighted'),
        'F1 Score': f1_score(y_test, y_pred, average='weighted')
    })

KeyError: 123

In [None]:
df_results = pd.DataFrame(results)