Naive Bayes Classifier

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv('diabetes_data.csv')

# Split features and target
X = df.drop(columns='Outcome')
y = df['Outcome']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Predictions
y_pred_nb = nb_model.predict(X_test)

# Evaluation
print("🔍 Naive Bayes Classification Report:\n")
print(classification_report(y_test, y_pred_nb))
accuracy_nb = accuracy_score(y_test, y_pred_nb)
print(f"Naive Bayes Accuracy: {accuracy_nb:.2f}")


🔍 Naive Bayes Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       1.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0

Naive Bayes Accuracy: 0.00


 ID3 Decision Tree Classifier

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score

# Step 1: Load the dataset
df = pd.read_csv('diabetes_data.csv')

# Step 2: Split the data into features and target
X = df.drop(columns='Outcome')
y = df['Outcome']

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Step 4: Initialize and train the ID3 Decision Tree (using entropy)
dt_model = DecisionTreeClassifier(criterion='entropy', random_state=42)
dt_model.fit(X_train, y_train)

# Step 5: Make predictions
y_pred_dt = dt_model.predict(X_test)

# Step 6: Evaluate the model
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print("\n🌳 Decision Tree (ID3) Classification Report:")
print(classification_report(y_test, y_pred_dt, zero_division=0))
print(f"Decision Tree Accuracy: {accuracy_dt:.2f}")



🌳 Decision Tree (ID3) Classification Report:
              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2

Decision Tree Accuracy: 0.50


Model Comparison

In [13]:
from sklearn.metrics import confusion_matrix, roc_auc_score

# Confusion matrices
cm_nb = confusion_matrix(y_test, y_pred_nb)
cm_dt = confusion_matrix(y_test, y_pred_dt)

# ROC AUC
roc_nb = roc_auc_score(y_test, y_pred_nb)
roc_dt = roc_auc_score(y_test, y_pred_dt)

# Summary
print("\n📊 Comparison of Naive Bayes vs Decision Tree:\n")
print(f"Naive Bayes Accuracy: {accuracy_nb:.2f}")
print(f"Decision Tree Accuracy: {accuracy_dt:.2f}")
print(f"Naive Bayes ROC AUC: {roc_nb:.2f}")
print(f"Decision Tree ROC AUC: {roc_dt:.2f}")
print("\nConfusion Matrix - Naive Bayes:\n", cm_nb)
print("\nConfusion Matrix - Decision Tree:\n", cm_dt)



📊 Comparison of Naive Bayes vs Decision Tree:

Naive Bayes Accuracy: 0.00
Decision Tree Accuracy: 0.50
Naive Bayes ROC AUC: 0.00
Decision Tree ROC AUC: 0.50

Confusion Matrix - Naive Bayes:
 [[0 1]
 [1 0]]

Confusion Matrix - Decision Tree:
 [[1 0]
 [1 0]]
