<a href="https://colab.research.google.com/github/PatrickTchoupe/HGR-Project/blob/main/gestureRecognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np

class Dataset:
    def __init__(self,elements,path="drive/MyDrive/Datasets_CSV/Domain1_csv/Subject"):

        self.dataset = [{} for _ in range(10)]
        self.data = []
        self.labels = []
        for subject in range(1,11):
            for i in elements:
                self.dataset[subject-1][i] = []
                for j in range(1,11):
                    try:
                        user_data=[]
                        df  = pd.read_csv(f"{path}{subject}-{i}-{j}.csv")
                        for k, line in df.iterrows():
                            user_data.append(list(line[0:3]))
                        self.dataset[subject-1][i].append(user_data)
                        self.data.append(user_data)
                        self.labels.append(i)
                    except IOError as e:
                        print(f"Unable to read dataset file {path}{subject}-{i}-{j}.csv!\n")

In [None]:
#Domain 1 dataset
data1 = Dataset([i for i in range(10)])
data1.dataset

In [None]:
#domain 4 dataset
figures = ["Pyramid","Sphere","Cone","Cuboid","Cylinder","Hemisphere","RectangularPipe","Tetrahedron","Toroid","CylindricalPipe"]
data4 = Dataset(figures,"drive/MyDrive/Datasets_CSV/Domain4_csv/Subject")

# Part 1 : KNN and DTW

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score
from scipy.stats import mode
from joblib import Parallel, delayed
from numba import njit, prange
from numba.typed import List

@njit()
def distance(x, y):
    """
    This function computes the euclidean distance between two vectors
    """
    dist = 0.0
    for i in range(len(x)):
        diff = (x[i] - y[i])
        dist += diff * diff
    return dist

@njit(nogil=True)
def dtw_distance(time_serie1, time_serie2):
    """
    This function computes the DTW distance
    """
    l1, l2 = len(time_serie1), len(time_serie2)
    cost_matrix = np.full((l1 + 1, l2 + 1), np.inf)
    # Initialize the first cell
    cost_matrix[0, 0] = 0.
    # populatte the cost matrix
    for i in range(l1):
        for j in range(l2):
            cost_matrix[i+1, j+1] = distance(List(time_serie1[i]), List(time_serie2[j]))
            cost_matrix[i+1, j+1] += min(cost_matrix[i, j +1], cost_matrix[i+1, j], cost_matrix[i, j])
    cost_matrix = np.sqrt(cost_matrix[1:, 1:]) # to get a matrix form
    return cost_matrix[-1, -1]/(l1+l2)

class KNN_DTW:
    """
    This class is a classifier using DTW as distance measure between pairs of time series data
    """
    def __init__(self, n_neighbors=1):
        self.n_neighbors = n_neighbors

    def fit(self, x, labels):
        """
        To fix the training set and the corresponding labels
        x: the training set containin list of the sequences
        labels: the labels corresponding to each sequences in x
        """
        self.x = np.array(x)
        self.labels = np.array(labels)


    def predict(self, x):
        """
        To predict the class of the test
        """
        dist_matrix = np.zeros((x.shape[0], self.x.shape[0]))

        # compute the distance matrix between the training (self.x) and the test set(x)
        matrix = Parallel(n_jobs=-1, prefer="threads", verbose=0)(
            delayed(dtw_distance)(
                List(x[i]), List(self.x[j])
            )
            for i in range(len(x)) for j in range(len(self.x))
        )
        dist_matrix = np.array(matrix).reshape((len(x), -1))
        # the index of the k nearest neighbors
        indexes = dist_matrix.argsort()[:, :self.n_neighbors]
        # identifiers the labels of neighbors
        labels = self.labels[indexes]
        # get the majority votes between labels
        predictions = mode(labels, axis=1)[0]
        #print(predictions)
        return predictions

# Validation function
def test(user_id, dataset, labels, model, LIMIT=100):
    # split the dataset
    indexes = range(user_id*LIMIT, user_id*LIMIT+LIMIT)
    train_set = np.delete(dataset, indexes)
    train_labels = np.delete(labels, indexes)
    test_labels = labels[indexes]
    test_set = dataset[indexes]
    # Prediction
    model.fit(train_set, train_labels)
    predictions = model.predict(test_set)
    return accuracy_score(test_labels, predictions), predictions

def ud_test(gesture_id,dataset,labels,model):
    #split the data
    indexes = gesture_id.T.flatten()
    train_set = np.delete(dataset, indexes)
    train_labels = np.delete(labels, indexes)
    test_labels = labels[indexes]
    test_set = dataset[indexes]
    # Prediction
    model.fit(train_set, train_labels)
    predictions = model.predict(test_set)
    return accuracy_score(test_labels, predictions), predictions


def validation(dataset, labels, model, LIMIT=100):
    dataset = np.array(dataset)
    labels = np.array(labels)
    accuracies = []
    predictions = []
    for user_id in range(10):
        accuracy, prediction = test(user_id, dataset, labels, model, LIMIT)
        accuracies.append(accuracy)
        predictions.append(prediction)
        print("The user score {}: {}".format(user_id+1, accuracies[-1]))
    return accuracies, predictions

def ud_validation(dataset,labels,model):
    dataset = np.array(dataset)
    labels = np.array(labels)
    accuracies = []
    predictions = []
    blocs = [[100*i + 10*k + j for i in range(10)] for j in range(10) for k in range(10)]
    indexes = np.reshape(blocs, (10, 10, 10))
    i=0
    for b in indexes:
        accuracy, prediction = ud_test(b, dataset, labels, model)
        accuracies.append(accuracy)
        predictions.append(prediction)
        print("The sample score {}: {}".format(i, accuracies[-1]))
        i+=1
    return accuracies, predictions



### Application on domain 1 user-dependent setting

In [None]:
model1_ud = KNN_DTW(3)
accuracies1_ud, prediction1_ud = ud_validation(np.array(data1.data,dtype=object).T, data1.labels, model1_ud)
print(f"Accuracies {accuracies1_ud} average accuracy {np.mean(accuracies1_ud)} and standard deviation {np.std(accuracies1_ud)}")

### Application on domain 1 user-independent setting

In [None]:
model1 = KNN_DTW(3)
accuracies1, prediction1 = validation(np.array(data1.data,dtype=object).T, data1.labels, model1,100)
print(f"Accuracies {accuracies1} average accuracy {np.mean(accuracies1)} and standard deviation {np.std(accuracies1)}")

 ### Application on domain 4 user-dependent setting

In [None]:
model4_ud = KNN_DTW(3)
accuracies4_ud, prediction4_ud = ud_validation(np.array(data4.data,dtype=object).T, data4.labels, model4_ud)
print(f"Accuracies {accuracies4_ud} average accuracy {np.mean(accuracies4_ud)} and standard deviation {np.std(accuracies4_ud)}")

### Application on domain 4 user-independent setting

In [None]:
model4 = KNN_DTW(3)
accuracies4, prediction4 = validation(np.array(data4.data,dtype=object).T, data4.labels, model4, 100)
print(f"Accuracies {accuracies4} average accuracy {np.mean(accuracies4)} and standard deviation {np.std(accuracies4)}")

## plot the confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

def plot_conf_mat(true_labels, pred_labels, LIMIT=100):
    for user_id in range(len(pred_labels)):
        print("The confusion matrix of user:", user_id+1)
        indexes = range(user_id*LIMIT, user_id*LIMIT+LIMIT)
        conf_mat = confusion_matrix(true_labels[indexes], pred_labels[user_id])
        df_cm = pd.DataFrame(conf_mat, range(10), range(10))
        sn.set(font_scale=1.4) # for label size
        plt.figure(figsize = (10,7))
        sn.heatmap(df_cm, annot=True) # font size
        plt.show()

### Confusion matrix for domain 1 in user-independent setting

In [None]:
plot_conf_mat(np.array(data1.labels), prediction1, LIMIT=100)

### Confusion matrix for domain 4 in user-independent mode

In [None]:
plot_conf_mat(np.array(data4.labels), prediction4, LIMIT=100)

# Part 2 :  $1 Recognizer

In [None]:
pip install dollarpy

In [None]:
from sklearn.decomposition import PCA
from dollarpy import Recognizer, Template, Point
from sklearn.metrics import accuracy_score


def transform(dataset):
    result = []
    pca_var = []
    for user_data in dataset.data:
        pca = PCA(n_components=2)
        new_data = pca.fit_transform(user_data)
        twod_data = [Point(*row) for row in new_data]
        result.append(twod_data)
        pca_var.append(pca.explained_variance_ratio_)

    return result,pca_var

final_dataset1,pca_variances1 = transform(data1)
final_dataset4, pca_variances4 = transform(data4)

print("The average explained variance ratio is over all the dataset 1: ", np.mean(pca_variances1, axis=0))
print("The average explained variance ratio is over all the dataset 4: ", np.mean(pca_variances4, axis=0))

In [None]:
def validation(dataset, labels, LIMIT=100):
    dataset = np.array(dataset)
    labels = np.array(labels)
    accuracies = []
    predictions_per_users = []
    for user_id in range(10):
        indexes = range(user_id*LIMIT, user_id*LIMIT+LIMIT)
        train_set = np.delete(dataset, indexes)
        train_labels = np.delete(labels, indexes)
        test_labels = labels[indexes]
        test_set = dataset[indexes]
        templates = []
        for i,d in enumerate(train_set):
            templates.append(Template(str(labels[i]), d))
        recognizer = Recognizer(templates)
        predictions=[]
        for t in test_set:
            result = recognizer.recognize(t)
            predictions.append(result[0])
        acc = accuracy_score(test_labels, predictions)
        print("The user score {}: {}".format(user_id+1, acc))
        accuracies.append(acc)
        predictions_per_users.append(predictions)
    return accuracies, predictions_per_users

def ud_validation(dataset, labels):
    dataset = np.array(dataset)
    labels = np.array(labels)
    accuracies = []
    predictions_per_samples = []
    blocs = [[100*i + 10*k + j for i in range(10)] for j in range(10) for k in range(10)]
    indxs = np.reshape(blocs, (10, 10, 10))
    cnt=0
    for b in indxs:
        ind = b.T.flatten()
        train_set = np.delete(dataset, ind)
        train_labels = np.delete(labels, ind)
        test_labels = labels[ind]
        test_set = dataset[ind]
        templates = []
        for i,d in enumerate(train_set):
            templates.append(Template(str(labels[i]), d))
        recognizer = Recognizer(templates)
        predictions=[]
        for t in test_set:
            result = recognizer.recognize(t)
            predictions.append(result[0])
        acc = accuracy_score(test_labels, predictions)
        print("The sample score {}: {}".format(cnt, acc))
        accuracies.append(acc)
        predictions_per_samples.append(predictions)
        cnt+=1
    return accuracies, predictions_per_samples


### Application on domain 1 User-dependent

In [None]:
accuracies1_ud, prediction1_ud = ud_validation(final_dataset1, data1.labels)
print(f"Accuracies {accuracies1_ud} average accuracy {np.mean(accuracies1_ud)} and standard deviation {np.std(accuracies1_ud)}")

### Application on domain 1 user-independent

In [None]:
accuracies1, prediction1 = validation(final_dataset1, data1.labels)
print(f"Accuracies {accuracies1} average accuracy {np.mean(accuracies1)} and standard deviation {np.std(accuracies1)}")

### Application on domain 4 user-dependent

In [None]:
accuracies4_ud, prediction4_ud = ud_validation(final_dataset4, data4.labels)
print(f"Accuracies {accuracies4_ud} average accuracy {np.mean(accuracies4_ud)} and standard deviation {np.std(accuracies4_ud)}")

### Application on domain 4 user-independent

In [None]:
accuracies4, prediction4 = validation(final_dataset4, data4.labels)
print(f"Accuracies {accuracies4} average accuracy {np.mean(accuracies4)} and standard deviation {np.std(accuracies4)}")

In [None]:
plot_conf_mat(np.array(data1.labels), prediction1)

In [None]:
plot_conf_mat(np.array(data4.labels), prediction4)

In [None]:
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score

print("Model $P dollar measure from the confusion matrixes")
true_labels = np.array(data4.labels)
for user_id in range(len(prediction4)):
    indexes = range(user_id*100, user_id*100+100)
    print("User ", user_id)
    print("The precision", precision_score(true_labels[indexes], prediction4[user_id], average=None))
    print("The f1-score", f1_score(true_labels[indexes], prediction4[user_id], average=None))
    print("The recall", recall_score(true_labels[indexes], prediction4[user_id], average=None))
    print()