In [6]:
#1. Predict Diabetes using Naive Bayes Classification


In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report


In [19]:
# Step 1: Load the dataset
df = pd.read_csv('Diabetes.csv')
print("Initial Data:\n", df.head())

Initial Data:
    Pragnency  Glucose  Blod Pressure  Skin Thikness  Insulin   BMI    DFP  \
0          1       85             66             29        0  26.6  0.351   
1          8      183             64              0        0  23.3  0.672   
2          1       89             66             23       94  28.1  0.167   
3          0      137             40             35      168  43.1  2.288   
4          5      116             74              0        0  25.6  0.201   

   Age  Diabetes  
0   31         0  
1   32         1  
2   21         0  
3   33         1  
4   30         0  


In [20]:
# Step 2: Split the data into features and target
X = df.drop(columns='Diabetes')
y = df['Diabetes']

In [21]:
# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [22]:
# Step 4: Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

In [23]:
# Step 5: Train the model
nb_classifier.fit(X_train, y_train)

In [24]:
# Step 6: Make predictions
y_pred_nb = nb_classifier.predict(X_test)

In [25]:
# Step 7: Evaluate the model
accuracy_nb = accuracy_score(y_test, y_pred_nb)
print(f"Naive Bayes Accuracy: {accuracy_nb:.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_nb))

Naive Bayes Accuracy: 0.72

Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.83      0.80       151
           1       0.62      0.51      0.56        80

    accuracy                           0.72       231
   macro avg       0.69      0.67      0.68       231
weighted avg       0.71      0.72      0.72       231



In [26]:
#2. Predict Diabetes using ID3 Decision Tree Classifier

In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [28]:
# Step 1: Load the dataset
df = pd.read_csv('Diabetes.csv')
print("Initial Data:\n", df.head())

Initial Data:
    Pragnency  Glucose  Blod Pressure  Skin Thikness  Insulin   BMI    DFP  \
0          1       85             66             29        0  26.6  0.351   
1          8      183             64              0        0  23.3  0.672   
2          1       89             66             23       94  28.1  0.167   
3          0      137             40             35      168  43.1  2.288   
4          5      116             74              0        0  25.6  0.201   

   Age  Diabetes  
0   31         0  
1   32         1  
2   21         0  
3   33         1  
4   30         0  


In [29]:
# Step 2: Split the data into features and target
X = df.drop(columns='Diabetes')
y = df['Diabetes']

In [30]:
# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [31]:
# Step 4: Initialize the Decision Tree classifier
dt_classifier = DecisionTreeClassifier(criterion='entropy', random_state=42)

In [32]:
# Step 5: Train the model
dt_classifier.fit(X_train, y_train)

In [34]:
# Step 6: Make predictions
y_pred_dt = dt_classifier.predict(X_test)

In [35]:
# Step 7: Evaluate the model
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f"Decision Tree Accuracy: {accuracy_dt:.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_dt))

Decision Tree Accuracy: 0.69

Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.73      0.75       151
           1       0.54      0.61      0.58        80

    accuracy                           0.69       231
   macro avg       0.66      0.67      0.66       231
weighted avg       0.70      0.69      0.69       231



In [36]:
#3. Compare Performance of Both Classifiers

In [37]:
from sklearn.metrics import confusion_matrix, roc_auc_score

In [38]:
# Calculate confusion matrices
conf_matrix_nb = confusion_matrix(y_test, y_pred_nb)
conf_matrix_dt = confusion_matrix(y_test, y_pred_dt)

In [39]:
# Calculate ROC AUC scores
roc_auc_nb = roc_auc_score(y_test, y_pred_nb)
roc_auc_dt = roc_auc_score(y_test, y_pred_dt)

In [40]:
# Print comparison resultsprint("\nNaive Bayes vs Decision Tree Classifier Performance:\n")
print(f"Naive Bayes Accuracy: {accuracy_nb:.2f}")
print(f"Decision Tree Accuracy: {accuracy_dt:.2f}")
print(f"Naive Bayes ROC AUC: {roc_auc_nb:.2f}")
print(f"Decision Tree ROC AUC: {roc_auc_dt:.2f}")

Naive Bayes Accuracy: 0.72
Decision Tree Accuracy: 0.69
Naive Bayes ROC AUC: 0.67
Decision Tree ROC AUC: 0.67


In [41]:
print("\nConfusion Matrix - Naive Bayes:\n", conf_matrix_nb)
print("\nConfusion Matrix - Decision Tree:\n", conf_matrix_dt)


Confusion Matrix - Naive Bayes:
 [[126  25]
 [ 39  41]]

Confusion Matrix - Decision Tree:
 [[110  41]
 [ 31  49]]
