# An **ExtraTrees** Classifier for HoloLens 2 Gaze Features
### Training a classifier with gaze features for calculating predictions of various activities

This notebook trains an ExtraTreesClassifier with selected features and corresponding labels.\
The features and labels are read from a given csv-file.


## Read data from a csv-file.

In [None]:
import os
import pandas as pd

# CHANGE these locations to where you stored the feature files ⬇️
recording_location = './'
all_features_csv = os.path.join(recording_location, './Data/FeatureFiles/feature_list_all.csv')
df = pd.read_csv(all_features_csv)

In [None]:
# Uncomment the following lines to see all columns of the csv file (i.e., the features and labels)
# print("Columns of the CSV file are 19 features, label of the activity, duration or the timespan of the activity, and the ID of the participant:")
# list(df.columns)

In [None]:
from IPython.display import display

read_df = df[df.label == 'Reading']
inspect_df = df[df.label == 'Inspection']
search_df = df[df.label == 'Search']

#print("Sample reading data:")
#display(read_df[['meanFix', 'maxFix', 'varFix', 'xDir', 'yDir']].head(10))

#print("Sample inspection data:")
#display(inspect_df[['meanFix', 'maxFix', 'varFix', 'xDir', 'yDir']].head(10))

#print("Sample search data:")
#display(search_df[['meanFix', 'maxFix', 'varFix', 'xDir', 'yDir']].head(10))

## For the classification we use an [ExtraTrees Classifier](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html)

First, we need to include some libraries 

In [None]:
#
#Importing the necessary packages and libaries
#
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn import svm, datasets
from sklearn.preprocessing import MaxAbsScaler
import matplotlib.pyplot as plt
import numpy as np

## Let's store the labels and six selected features (among 19 as denoted in the csv file)

In [None]:
features = df[["meanFix", "maxFix", "varFix", "xDir", "yDir", "fixDensPerBB"]]
labels = df ['label']

## This is how the features and labels look

In [None]:
print("Features:")
display(features.head(10))
print("Labels:")
display(labels.head(10))

## Let's normalize the features (i.e., each column indivudally)

In [None]:
scaler = MaxAbsScaler()
scaler.fit(features)
scaled = scaler.transform(features)
scaled_features = pd.DataFrame(scaled, columns=features.columns)
print("Normalized Features:")
display(scaled_features.head(10))

## Training / Test Split

In [None]:
# feature_train, feature_test, label_train, label_test = train_test_split(features, labels, train_size=0.8, random_state = 0, stratify=labels)

# Uncomment the following line if you want to work with normalized features. You will note how the accuracy and confusion matrix changes
feature_train, feature_test, label_train, label_test = train_test_split(scaled_features, labels, train_size=0.8, random_state = 0, stratify=labels)

## Train the Classifier and Compare with RandomForest

In [None]:
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
import time
import numpy as np

cl = ExtraTreesClassifier(n_estimators=500)
cl.fit(feature_train, label_train)

## Lets collect the predictions from test data, . . .

In [None]:
pred = cl.predict(feature_test)

## ... compare them with the RandomForestClassifier, ...

In [None]:
times = []

for i in range(20):
    start = time.time()
    cl = ExtraTreesClassifier(n_estimators=500)
    cl.fit(feature_train, label_train)
    pred = cl.predict(feature_test)
    end = time.time()
    times.append(end-start)

print(np.mean(times))
print(np.std(times))

In [None]:
times = []

for i in range(20):
    start = time.time()
    cl = RandomForestClassifier(n_estimators=500)
    cl.fit(feature_train, label_train)
    pred = cl.predict(feature_test)
    end = time.time()
    times.append(end-start)

print(np.mean(times))
print(np.std(times))

## . . . and have a look at the accuracy of the ExtraTreesClassifier:

In [None]:
# retrieve the accuracy and print it
accuracy = cl.score(feature_test, label_test)
print("Accuracy:", accuracy)

## Dump  the trained model so that it can be used in the online mode with the HoloLens 2

In [None]:
from joblib import dump, load
dump(cl, 'classifier.joblib') 

## This is how the confusion matrices looks like:

In [None]:
# creating a confusion matrix
cm = confusion_matrix(label_test, pred)

print("CM:")
print(cm)

## A more colorful confusion matrix:

In [None]:
cm = ConfusionMatrixDisplay.from_estimator(cl, feature_test, label_test)
fig, ax = plt.subplots(figsize=(8,8))
cm.plot(ax=ax)