In [4]:
# Student Info
print("Name: Jiya Honaa")
print("Symbol No: 29097/078")
print("Lab: ID3 Decision Tree Classifier")

import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.model_selection import train_test_split
from sklearn import metrics
import graphviz

# Load the diabetes dataset
df = pd.read_csv("Diabetes_ID3.csv")  # Corrected filename

# Prepare features and labels
X = pd.get_dummies(df.drop(columns=["Diabetes_ID3"]))
y = df["Diabetes_ID3"].map({"No": 0, "Yes": 1}).values

# Split data into training and test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train ID3 decision tree classifier
model = DecisionTreeClassifier(
    criterion="entropy", 
    max_depth=4, 
    min_samples_split=3, 
    min_samples_leaf=2, 
    random_state=42
)
model.fit(X_train, y_train)

# Predict on training and test data
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Training evaluation
print("Training Confusion Matrix:")
print(metrics.confusion_matrix(y_train, y_train_pred))
print("\nTraining Classification Report:")
print(metrics.classification_report(y_train, y_train_pred))
print("Training Accuracy:", metrics.accuracy_score(y_train, y_train_pred))

# Test evaluation
print("\nTest Confusion Matrix:")
print(metrics.confusion_matrix(y_test, y_test_pred))
print("\nTest Classification Report:")
print(metrics.classification_report(y_test, y_test_pred))
print("Test Accuracy:", metrics.accuracy_score(y_test, y_test_pred))

# Feature importance
print("\nFeature Importance:")
for feature, importance in zip(X.columns, model.feature_importances_):
    print(f"{feature}: {importance:.4f}")

# Visualize decision tree
dot_data = export_graphviz(
    model,
    out_file=None,
    feature_names=X.columns,
    class_names=["No", "Yes"],
    filled=True,
    rounded=True,
    special_characters=True,
)
graph = graphviz.Source(dot_data)
graph.render("diabetes_id3_tree", format="pdf")
graph.view()


Name: Jiya Honaa
Symbol No: 29097/078
Lab: ID3 Decision Tree Classifier
Training Confusion Matrix:
[[34  2]
 [18 26]]

Training Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.94      0.77        36
           1       0.93      0.59      0.72        44

    accuracy                           0.75        80
   macro avg       0.79      0.77      0.75        80
weighted avg       0.80      0.75      0.74        80

Training Accuracy: 0.75

Test Confusion Matrix:
[[10  1]
 [ 7  2]]

Test Classification Report:
              precision    recall  f1-score   support

           0       0.59      0.91      0.71        11
           1       0.67      0.22      0.33         9

    accuracy                           0.60        20
   macro avg       0.63      0.57      0.52        20
weighted avg       0.62      0.60      0.54        20

Test Accuracy: 0.6

Feature Importance:
Pregnancies: 0.0000
Glucose: 0.1779
BloodPressure: 0.0000
S

'diabetes_id3_tree.pdf'