## Import tools

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from PIL import Image
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, export_graphviz 
from subprocess import call

## Data Extraction

In [None]:
img = Image.open(r"../pics/guess_who.jpg")
img.show()

In [None]:
data = pd.read_csv("../csv/guess_who_dataset.csv", header=0)
data.head(10)

## Feature Extraction

In [None]:
colNames = ["Name", "Bold", "Hat", "Glasses", "Blue Eyes", "Moustache", "Beard", "Brown Hair", "Wide Nose", "White Hair"]
selectedFeatures = colNames[1:]
print(len(selectedFeatures))
X = data[selectedFeatures]
classNames = data["Name"].to_list()
y = range(0, len(classNames))

## Create model

In [None]:
regressor = DecisionTreeRegressor(random_state = 2022, max_depth=10) 
regressor.fit(X, y)

## Visualize the model

In [None]:
# fig = plt.figure(figsize=(25,20))
# _ = tree.plot_tree(regressor,
#                    feature_names = selectedFeatures,
#                    class_names = classNames,
#                    filled=True)

In [None]:
# Export as dot file
dotFile = "../pics/tree.dot"
pngFile = "../pics/tree.png"

export_graphviz(regressor, out_file=dotFile, 
                feature_names = selectedFeatures,
                class_names = classNames,
                rounded = True, proportion = True, 
                precision = 2, filled = True)

# Convert to png using system command (requires Graphviz)
call(['dot', '-Tpng', dotFile, '-o', pngFile, '-Gdpi=500'])

img = Image.open(pngFile)
img.show()

## Test the model

In [None]:
classNames[0]
idx = classNames.index("Alex")
alex = X[selectedFeatures][idx:idx+1]
# print(type(alex))
# print(alex)
pred = round(regressor.predict(alex, selectedFeatures)[0])
print(classNames[pred])

In [None]:
idx = classNames.index("Bill")
bill = X[selectedFeatures][idx:idx+1]
pred = round(regressor.predict(bill, selectedFeatures)[0])
print(classNames[pred])

In [None]:
idx = classNames.index("Philip")
philip = X[selectedFeatures][idx:idx+1]
pred = round(regressor.predict(philip, selectedFeatures)[0])
print(classNames[pred])