In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset (stable source)
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"

columns = [
    "Pregnancies", "Glucose", "BloodPressure", "SkinThickness",
    "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"
]

df = pd.read_csv(url, names=columns)

# Features & target
X = df.drop(columns=["Outcome"])
y = df["Outcome"]

# Handle missing values (0 treated as missing in medical fields)
cols_with_zero = ["Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI"]
X[cols_with_zero] = X[cols_with_zero].replace(0, np.nan)

imputer = SimpleImputer(strategy="median")
X_imputed = imputer.fit_transform(X)

# Train-test split
X_tr, X_te, y_tr, y_te = train_test_split(
    X_imputed, y, test_size=0.25, random_state=42, stratify=y
)

labels = {0: "Non-Diabetic", 1: "Diabetic"}


In [2]:
from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb.fit(X_tr, y_tr)

y_pred = nb.predict(X_te)

print("=== Naive Bayes ===")
print("Accuracy:", accuracy_score(y_te, y_pred))
print("\nClassification Report:\n",
      classification_report(y_te, y_pred, target_names=labels.values()))
print("Confusion Matrix:\n", confusion_matrix(y_te, y_pred))


=== Naive Bayes ===
Accuracy: 0.7239583333333334

Classification Report:
               precision    recall  f1-score   support

Non-Diabetic       0.80      0.78      0.79       125
    Diabetic       0.60      0.63      0.61        67

    accuracy                           0.72       192
   macro avg       0.70      0.70      0.70       192
weighted avg       0.73      0.72      0.73       192

Confusion Matrix:
 [[97 28]
 [25 42]]


In [3]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(
    criterion="entropy",
    max_depth=5,          # avoids overfitting
    random_state=42
)

dt.fit(X_tr, y_tr)
y_pred = dt.predict(X_te)

print("=== Decision Tree (Entropy) ===")
print("Accuracy:", accuracy_score(y_te, y_pred))
print("\nClassification Report:\n",
      classification_report(y_te, y_pred, target_names=labels.values()))
print("Confusion Matrix:\n", confusion_matrix(y_te, y_pred))


=== Decision Tree (Entropy) ===
Accuracy: 0.7552083333333334

Classification Report:
               precision    recall  f1-score   support

Non-Diabetic       0.77      0.90      0.83       125
    Diabetic       0.72      0.49      0.58        67

    accuracy                           0.76       192
   macro avg       0.74      0.69      0.71       192
weighted avg       0.75      0.76      0.74       192

Confusion Matrix:
 [[112  13]
 [ 34  33]]


In [4]:
from sklearn.neural_network import MLPClassifier

# ANN needs scaling
scaler = StandardScaler()
X_tr_scaled = scaler.fit_transform(X_tr)
X_te_scaled = scaler.transform(X_te)

ann = MLPClassifier(
    hidden_layer_sizes=(16, 8),
    activation="relu",
    max_iter=1000,
    random_state=42
)

ann.fit(X_tr_scaled, y_tr)
y_pred = ann.predict(X_te_scaled)

print("=== Artificial Neural Network (ANN) ===")
print("Accuracy:", accuracy_score(y_te, y_pred))
print("\nClassification Report:\n",
      classification_report(y_te, y_pred, target_names=labels.values()))
print("Confusion Matrix:\n", confusion_matrix(y_te, y_pred))


=== Artificial Neural Network (ANN) ===
Accuracy: 0.7135416666666666

Classification Report:
               precision    recall  f1-score   support

Non-Diabetic       0.78      0.78      0.78       125
    Diabetic       0.59      0.60      0.59        67

    accuracy                           0.71       192
   macro avg       0.69      0.69      0.69       192
weighted avg       0.71      0.71      0.71       192

Confusion Matrix:
 [[97 28]
 [27 40]]


