In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [None]:
dataset = load_digits()

X, y = dataset.data, dataset.target

y_binary_imbalanced = y.copy()
y_binary_imbalanced[y_binary_imbalanced != 1] = 0

X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

# creating decision tree with max depth of 2
dt = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)
tree_predicted = dt.predict(X_test)

# when evaluating a model, you need to see various types of metrics in order to get a good
# grasp on whether the model is performing well or not. Here we are looking at the accuracy,
# precision, recall, and f1 score of the model

#(TP = true positive, FP = false positive, TN = true negative, FN = false negative)

# accuracy = # of predictions correct / total predictions
print("accuracy: {:.2f}".format(accuracy_score(y_test, tree_predicted)))
# precision = TP / (TP + FP) 
# quantifies the number of correct positive predictions made by dividing the number of correct
# positive predictions by the total amount of positive predictions
print("precision: {:.2f}".format(precision_score(y_test, tree_predicted)))
# recall = TP / (TP + FN)
# quantifies the number of correct positive predictions made out of all positive predictions
# that could be made
print("recall: {:.2f}".format(recall_score(y_test, tree_predicted)))
# f1 = 2 x (precision x recall) / (precision + recall) or:
# f1 = 2TP / (2TP + FN + FP)
# provides a way to combine both precision and recall into a single measure 
# that captures both properties.
print("f1: {:.2f}".format(f1_score(y_test, tree_predicted)))

In [None]:
# sklearn has a function that combines and outputs these values into a single table.
# the support column lists the number of instances in the test set that have the given label
print(classification_report(y_test, tree_predicted, target_names=['other digits', '1']))