Diabetes Diagnostic Project


In [11]:
# Load and Clean the Dataset

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("diabetes.csv")

# Check for missing values
print(df.isnull().sum())  # Ensure there are no missing values

# Separate features (X) and target (y)
X = df.drop(columns=['Outcome'])  # Assuming 'Outcome' is the target column
y = df['Outcome']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save the scaler for later use in Flask
import joblib
joblib.dump(scaler, "scaler1.pkl")

print("Data preprocessing complete!")


Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64
Data preprocessing complete!


In [12]:
# Train a Stacking Model 

from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import joblib

# Define base models
base_learners = [
    ('xgb', XGBClassifier(n_estimators=200, max_depth=4, learning_rate=0.01, subsample=0.7, colsample_bytree=1.0, gamma=0.2, random_state=42)),
    ('svc', SVC(kernel='linear', probability=True, random_state=42)),
    ('lr', LogisticRegression(random_state=42))
]

# Define meta-model
meta_model = LogisticRegression()

# Create Stacking Classifier
stacking_model = StackingClassifier(estimators=base_learners, final_estimator=meta_model)

# Train the model
stacking_model.fit(X_train_scaled, y_train)

# Save the trained model
joblib.dump(stacking_model, "stacking_model1.pkl")

# Evaluate the model
y_pred = stacking_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")


Model Accuracy: 0.7792


In [14]:
import joblib

# Save the trained stacking model
joblib.dump(stacking_model, "stacking_model1.pkl")


['stacking_model1.pkl']