In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from xgboost import XGBClassifier
import joblib

In [4]:
df = pd.read_csv("diabetes_dataset.csv")

In [5]:
df['Diabetes_Status'] = np.where((df['Fasting_Blood_Glucose'] > 125) | (df['HbA1c'] > 6.5), 1, 0)
df.fillna(df.mean(numeric_only=True), inplace=True)

In [6]:
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
df_encoded = df.copy()

In [7]:
le_dict = {}

for col in categorical_cols:
    le = LabelEncoder()
    df_encoded[col] = le.fit_transform(df_encoded[col])
    le_dict[col] = le

In [8]:

joblib.dump(le_dict, 'label_encoders.pkl')

['label_encoders.pkl']

In [9]:

X = df_encoded.drop('Diabetes_Status', axis=1)
y = df_encoded['Diabetes_Status']

In [10]:

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [11]:

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

In [12]:
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_model.fit(X_train, y_train)

xgb_pred = xgb_model.predict(X_test)
xgb_proba = xgb_model.predict_proba(X_test)[:, 1]

In [13]:
def evaluate_model(y_true, y_pred, y_proba, model_name):
    print(f"\n{model_name} Evaluation:")
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
    print(f"Precision: {precision_score(y_true, y_pred):.4f}")
    print(f"Recall: {recall_score(y_true, y_pred):.4f}")
    print(f"F1-Score: {f1_score(y_true, y_pred):.4f}")
    print(f"ROC-AUC: {roc_auc_score(y_true, y_proba):.4f}")

In [14]:
evaluate_model(y_test, xgb_pred, xgb_proba, "XGBoost")


XGBoost Evaluation:
Accuracy: 0.9995
Precision: 0.9994
Recall: 1.0000
F1-Score: 0.9997
ROC-AUC: 1.0000


In [15]:
joblib.dump(xgb_model, 'xgboost_model.pkl')
xgb_model.save_model('xgboost_model.json')