In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

In [4]:
# Load dataset
df = pd.read_csv("diabetes_prediction_dataset.csv")

# Step 1: Encode categorical columns (if any)
le = LabelEncoder()
for col in df.select_dtypes(include=['object']).columns:
    df[col] = le.fit_transform(df[col])

# Step 2: Split features and target
X = df.drop("diabetes", axis=1)  # Update if your target column is named differently
y = df["diabetes"]

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Step 5: Evaluation
y_pred = model.predict(X_test)

# Accuracy and other metrics
print(f"Accuracy Score: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy Score: 0.9587

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.98     18292
           1       0.86      0.61      0.72      1708

    accuracy                           0.96     20000
   macro avg       0.91      0.80      0.85     20000
weighted avg       0.96      0.96      0.96     20000

Confusion Matrix:
 [[18127   165]
 [  661  1047]]


In [7]:
joblib.dump(model, "diabetes_model.pkl")
print("✅ Model saved as 'diabetes_model.pkl'")

✅ Model saved as 'diabetes_model.pkl'


In [10]:
df = pd.read_csv("diabetes_prediction_dataset.csv")

# Encode categorical variables
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])

# Split features and target
X = df.drop("diabetes", axis=1)
y = df["diabetes"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Save model, scaler, and label encoder
joblib.dump(model, "diabetes_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(le, "label_encoder.pkl")

print("✅ Model, Scaler, and Encoder saved successfully.")

✅ Model, Scaler, and Encoder saved successfully.


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

# Load dataset
df = pd.read_csv("diabetes_prediction_dataset.csv")

# Define feature and target
X = df.drop("diabetes", axis=1)
y = df["diabetes"]

# Define column types
numerical_cols = ['age', 'bmi', 'HbA1c_level', 'blood_glucose_level']
categorical_cols = ['gender', 'smoking_history']
binary_cols = ['hypertension', 'heart_disease']

# Step 1: ColumnTransformer (Selector)
selector = ColumnTransformer(transformers=[
    ("num", "passthrough", numerical_cols),
    ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_cols),
    ("bin", "passthrough", binary_cols)
])

# Step 2: Apply selector to get preprocessed feature matrix
X_selected = selector.fit_transform(X)

# Step 3: Scale the selected features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_selected)

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 5: Train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Step 6: Save components separately
joblib.dump(model, "diabetes_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(selector, "selector.pkl")

print("✅ Model, scaler, and selector saved separately.")


✅ Model, scaler, and selector saved separately.
