## Import tools

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import path as os_path
from PIL import Image
from sklearn import tree
from sklearn.tree import export_graphviz 
from subprocess import call
import dtreeviz

## Data Extraction

In [None]:
img = Image.open(r"../pics/guess_who.jpg")
img.show()

In [None]:
data = pd.read_csv("../csv/guess_who_dataset.csv", header=0)
data.head(10)

## Feature Extraction

In [None]:
colNames = ["Name", "Bold", "Hat", "Glasses", "Blue Eyes", "Moustache", "Beard", "Brown Hair", "Wide Nose", "White Hair"]
selectedFeatures = colNames[1:]
print(len(selectedFeatures))
X = data[selectedFeatures]
classNames = data["Name"].to_list()
y = range(0, len(classNames))

features = X
target = data["Name"]

## Create model

In [None]:
fig = plt.figure(figsize=(25,20))
clf = tree.DecisionTreeClassifier(max_depth=8)
clf.fit(features, target)

## Visualize the model

In [None]:
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(clf,
                   feature_names = selectedFeatures,
                   class_names = classNames,
                   filled=True)


In [None]:
def exportTreeToImageFile(filePath, fileName, featureNames, classNames):
    dotFile = os_path.join(filePath, f"{fileName}.dot")
    pngFile = os_path.join(filePath, f"{fileName}.png")                           
    
    export_graphviz(clf, out_file=dotFile, 
                    feature_names = featureNames,
                    class_names = classNames,
                    rounded = True, proportion = True, 
                    precision = 2, filled = True
                   )

    call(['dot', '-Tpng', dotFile, '-o', pngFile, '-Gdpi=350'])

In [None]:
filePath = "../pics"
fileName = "tree"
# exportTreeToImageFile(filePath, fileName, selectedFeatures, classNames)
pngFile = os_path.join(filePath, f"{fileName}.png")                           
img = Image.open(pngFile)
img.show()

## Test the model

In [None]:
def guessWho(personName):
    idx = classNames.index(personName)
    person = X[selectedFeatures][idx:idx+1]
    return clf.predict(person)

In [None]:
print(guessWho("Alex"))
print(guessWho("Alfred"))
print(guessWho("Tom"))