In [57]:
import numpy as np 
import numpy.typing as npt 
import pandas as pd 

import matplotlib.pyplot as plt
import seaborn as sns

import re

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

from scipy.spatial import distance

In [51]:
USER_IDS = range(0, 10)

### Load datasets

In [64]:
def load_datasets(domain: str):
	dir_path = f"datasets/{domain}"

	X = []
	Y = []

	# matrix (number_drawn, user_id)
	# at each cell we have a list of multiple try of a specific number drawing (number_drawn)
	# drawn by a specific user (user_id), each try contains a list of data points (x, y, z)
	hand_gesture_data_matrix = np.zeros((10, 10), dtype=list)
	for i in range(0, 10):
		for j in range(0, 10):
			hand_gesture_data_matrix[i, j] = []

	for filename in range(1, 1001):
		file_path = f"{dir_path}/{filename}.txt"
		with open(file=file_path, mode="r") as f:
			# get the target, user_id 
			# and a list of positions vectors \in \R^3: <x, y, z>
			# that represents the drawing
			number_drawn, user_id, gesture_datas = load_gesture_data(file=f)

			X.append(gesture_datas)
			Y.append(number_drawn)

			hand_gesture_data_matrix[number_drawn - 1, user_id - 1].append(gesture_datas)

	return np.array(X), np.array(Y), hand_gesture_data_matrix

def load_gesture_data(file):
	""" 
	Structure of hand gesture dataset:
	-----
	Domain id = <domain-id>
	Class id = <class-id>
	User id = <user-id>

	<x>,<y>,<z>,<t>
	...
	-----
	"""
	lines = file.readlines()
	
	number_drawn = lines[1].strip()
	match_number = re.search("=", number_drawn)
	number_drawn = int(match_number.string[match_number.end():].strip())

	user_id = lines[2].strip()
	match_user = re.search("=", user_id)
	user_id = int(match_user.string[match_user.end():].strip())

	gesture_datas = []

	for row in range(5, len(lines)):
		gesture_data = lines[row].split(",")
		# we only keep <x, y, z> coordinates
		gesture_data = np.array([float(data.strip()) for data in gesture_data[0:-1]])
		gesture_datas.append(gesture_data)
	
	return number_drawn, user_id, gesture_datas

X, Y, hand_gesture_data_matrix = load_datasets(domain="Domain01")

  return np.array(X), np.array(Y), hand_gesture_data_matrix


### Preprocessing

In [65]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=0.8, random_state=42, shuffle=True)

# standardize data
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

### Dynamic Time Warping

In [99]:
# https://towardsdatascience.com/dynamic-time-warping-3933f25fcdd
def DTW_distance(s: npt.NDArray, t: npt.NDArray, window: int = 3) -> float:
	n = len(s)
	m = len(t)

	DTW_matrix = np.full((n+1, m+1), fill_value=np.inf)
	DTW_matrix[0, 0] = 0.0

	window = max(window, abs(n - m))

	for i in range(0, n+1):
		for j in range(max(0, i - window), min(m, i + window) + 1):
			DTW_matrix[i, j] = 0.0

	for i in range(1, n+1):
		for j in range(max(1, i - window), min(m, i + window) + 1):
			# manhattan distance
			cost = distance.cityblock(s[i-1], t[j-1])
			optimal_warping_path = min(
				DTW_matrix[i-1, j], # insertion
				DTW_matrix[i, j-1], # deletion
				DTW_matrix[i-1, j-1] # match
			)

			DTW_matrix[i, j] = cost + optimal_warping_path
	
	return DTW_matrix[n, m]

In [100]:
# https://nbviewer.org/github/markdregan/K-Nearest-Neighbors-with-Dynamic-Time-Warping/blob/master/K_Nearest_Neighbor_Dynamic_Time_Warping.ipynb

# http://alexminnaar.com/2014/04/16/Time-Series-Classification-and-Clustering-with-Python.html

class KNN:
	def __init__(self, n_neighbors = 1, distance_fun = DTW_distance):
		self.n_neigbors = n_neighbors
		self.distance_fun = distance_fun

		self.X = None 
		self.y = None
	
	def fit(self, X: npt.NDArray, y: npt.NDArray):
		self.X = X
		self.y = y

	def distance_matrix(self, X: npt.NDArray, y: npt.NDArray):
		n = len(X)
		m = len(y)
		distance_matrix = np.zeros((n, m))

		for i in range(0, n):
			for j in range(0, m):
				distance_matrix[i, j] = self.distance_fun(
					X[i],
					y[i]
				)
		
		return distance_matrix

	def predict(self, X: npt.NDArray):
		distance_matrix = self.distance_matrix(X, self.X)

		knn_idx = distance_matrix.argsort()[:, :self.n_neigbors]
		knn_labels = self.y[knn_idx]

		prediction = mode(knn_labels, axis=1)[0]
		proba = mode(knn_labels, axis=1)[1] / self.n_neigbors

		return prediction.ravel(), proba.ravel()

In [101]:
knn = KNN()

knn.fit(X_train, y_train)
y_pred, proba = knn.predict(X_test)

[array([-0.033923,  0.015784,  0.197038]), array([-0.034185,  0.016318,  0.196788]), array([-0.034341,  0.016681,  0.196535]), array([-0.034344,  0.01668 ,  0.196288]), array([-0.034473,  0.016802,  0.196044]), array([-0.034543,  0.016801,  0.195811]), array([-0.034632,  0.016874,  0.195577]), array([-0.034632,  0.016868,  0.195356]), array([-0.034451,  0.016837,  0.195139]), array([-0.034137,  0.01706 ,  0.194918]), array([-0.033782,  0.018978,  0.194657]), array([-0.03207 ,  0.020458,  0.194367]), array([-0.029702,  0.022542,  0.194006]), array([-0.027016,  0.025094,  0.193578]), array([-0.023842,  0.027461,  0.193071]), array([-0.020357,  0.029663,  0.192502]), array([-0.017008,  0.032583,  0.191831]), array([-0.012466,  0.033911,  0.191224]), array([-0.008248,  0.035612,  0.190691]), array([-0.004574,  0.037063,  0.190257]), array([0.000198, 0.037654, 0.189866]), array([0.00394 , 0.038081, 0.189527]), array([0.006897, 0.038812, 0.189218]), array([0.010779, 0.038807, 0.18895 ]), arr

In [69]:
classification_report(y_true=y_test, y_pred=y_pred)

NameError: name 'y_pred' is not defined

In [68]:
conf_matrix = np.array(confusion_matrix(y_true=y_test, y_pred=y_pred))
sns.heatmap(conf_matrix)

NameError: name 'y_pred' is not defined