# Decision Tree Exhibition

## Part 1: Decision Tree from Scratch

In [1]:
# Step 0: Import the necessary functions
from from_scratch.decision_tree import DecisionTree, information_gain
from from_scratch.evaluation_metrics import f1_measure, precision_and_recall, confusion_matrix, accuracy
from from_scratch.import_data import load_data, train_test_split

In [2]:
# Step 1: Import diabetes.csv with load_data
features, targets, attribute_names = load_data("diabetes.csv")
train_features, train_targets, test_features, test_targets = train_test_split(features, targets, fraction = 0.85)

In [3]:
# Step 2: Fit a decision tree to the training data
learner = DecisionTree(attribute_names)
learner.fit(train_features, train_targets)

learner.visualize() # visualize tree

0: Glucose == 124.0
1:  Age == 29.0
2:   BMI == 31.0
3:    Pregnancies == 8.0
4:     DiabetesPedigreeFunction == 0.678
5:      root == 0
5:      SkinThickness == 14.0
6:       Insulin == 182.0
7:        root == 1
7:        root == 0
6:       root == 0
4:     root == 1
3:    DiabetesPedigreeFunction == 0.496
4:     SkinThickness == 16.0
5:      Pregnancies == 5.0
6:       BloodPressure == 60.0
7:        root == 1
7:        Insulin == 0.0
8:         root == 1
8:         root == 0
6:       root == 1
5:      BloodPressure == 58.0
6:       Pregnancies == 3.0
7:        root == 0
7:        root == 1
6:       root == 0
4:     BloodPressure == 70.0
5:      SkinThickness == 30.0
6:       Insulin == 90.0
7:        root == 0
7:        Pregnancies == 2.0
8:         root == 0
8:         root == 1
6:       Pregnancies == 4.0
7:        Insulin == 48.0
8:         root == 1
8:         root == 0
7:        root == 1
5:      SkinThickness == 18.0
6:       Pregnancies == 3.0
7:        Insulin == 0.0
8:     

In [4]:
# Step 3: Predict labels of testing set
predictions = learner.predict(test_features)

In [5]:
# Step 4: Evaluate the decision tree's performance
confusion_mat = confusion_matrix(test_targets, predictions)
accuracy_num = accuracy(test_targets, predictions)
precision, recall = precision_and_recall(test_targets, predictions)
f1_measure_num = f1_measure(test_targets, predictions)

print(f"Confusion Matrix:\n{confusion_mat}\n")
print(f"Accuracy: {accuracy_num}\n")
print(f"Precision: {precision}; Recall: {recall}\n")
print(f"F1_Measure: {f1_measure_num}\n")

Confusion Matrix:
[[88 15]
 [23 28]]

Accuracy: 0.7532467532467533

Precision: 0.6511627906976745; Recall: 0.5490196078431373

F1_Measure: 0.5957446808510639

