# Decision Tree Exhibition

## Part 1: Decision Tree from Scratch

In [None]:
# Step 0: Import the necessary packages
from from_scratch.decision_tree import DecisionTree, information_gain
from from_scratch.evaluation_metrics import f1_measure, precision_and_recall, confusion_matrix, accuracy
from from_scratch.import_data import load_data, train_test_split

In [None]:
# Step 1: Import diabetes.csv with load_data
features, targets, attribute_names = load_data("diabetes.csv")
train_features, train_targets, test_features, test_targets = train_test_split(features, targets, fraction = 0.85)

In [None]:
# Step 2: Fit a decision tree to the training data
learner = DecisionTree(attribute_names)
learner.fit(train_features, train_targets)

learner.visualize() # visualize tree

In [None]:
# Step 3: Predict labels of testing set and evaluate the decision tree's performance
predictions = learner.predict(test_features)

confusion_mat = confusion_matrix(test_targets, predictions)
accuracy_num = accuracy(test_targets, predictions)
precision, recall = precision_and_recall(test_targets, predictions)
f1_measure_num = f1_measure(test_targets, predictions)

print(f"Confusion Matrix:\n{confusion_mat}\n")
print(f"Accuracy: {accuracy_num}\n")
print(f"Precision: {precision}; Recall: {recall}\n")
print(f"F1_Measure: {f1_measure_num}\n")

## Part 2: Decision Tree with scikit-learn


In [None]:
# Step 0: Import the necessary packages
## For preparing the data and fitting the decision tree
import pandas as pd
import sklearn.tree
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text

In [None]:
# Step 1: Import and wrangle diabetes.csv
pima = pd.read_csv("diabetes.csv", header = 0)
X = pima[["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age"]]
y = pima["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 1)

In [None]:
# Step 2: Fit decision tree classifier
clf = DecisionTreeClassifier(criterion="entropy")
clf = clf.fit(X_train, y_train)

print(export_text(clf, feature_names = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age"])) # Visualize tree

In [None]:
# Step 3: Predict labels of testing set and evaluate the decision tree's performance
y_predictions = clf.predict(X_test)

confusion_matrix