# In this notebook we will load every data from the randomized experiments, normalize then and then separate into training, validation and testing datasets, folowwing 80/10/10 proportion.

In [1]:
import pickle

import numpy as np
import plotly.express as px

# Loading the data

In [2]:
gesture_names = ["click", "closed", "down", "mouse_tracking", "negative_closed", "negative_mouse_tracking",
                 "negative_side", "negative_up", "side", "up"]

distances = ["50cm", "75cm", "100cm"]

In [3]:
features = list()
targets  = list()

label = 0

for gesture_name in gesture_names:
    for distance in distances:
        with open(f"data/express/{distance}/{gesture_name}", "rb") as file:
            temp = pickle.load(file)
            features += temp

            for t in temp:
                targets.append(label)

    label += 1

# Normalizing the data

In [4]:
def normalize(z: float, max: float, min: float) -> float:
    norm = (z - min)/(max - min)
    round_norm = round(norm, 3)
    
    return round_norm


def normalize_landmarks(landmarks: list) -> list:
    norm_landmarks = list()

    for landmark in landmarks:
        xs = [landmark[i][0] for i in range(len(landmark))]
        ys = [landmark[i][1] for i in range(len(landmark))]

        xmax = max(xs)
        ymax = max(ys)
        xmin = min(xs)
        ymin = min(ys)

        norm_landmark = list()

        for x, y in zip(xs, ys):
            norm = [normalize(z=x, max=xmax, min=xmin), normalize(z=y, max=ymax, min=ymin)]

            norm_landmark.append(norm)

        norm_landmarks.append(norm_landmark)

    return norm_landmarks

In [5]:
norm_features = normalize_landmarks(landmarks=features)

# Shuffling the data, note that the targets and norm features are shuffled in the same way

In [6]:
aux = list(zip(norm_features, targets))

np.random.seed(seed=42)
np.random.shuffle(aux)

norm_features, targets = zip(*aux)

norm_features = list(norm_features)
targets       = list(targets)

# Splitting the data between training and testing dataset, using 70/30

In [7]:
num_samples = len(norm_features)
percentage = 0.7

train_features = norm_features
train_targets  = targets

In [8]:
train_data = [train_features, train_targets]

# Checking the balance of the data

In [9]:
nums = list()

for i in range(10):
    temp = 0

    for cl in train_targets:
        if cl == i:
            temp += 1

    nums.append(temp)

px.bar(nums)

# Balancing the data

In [10]:
min_class = min(nums)

balanced_train_feat    = list()
balanced_train_targets = list()

counts = {i: 0 for i in range(10)}

for features, target in zip(train_data[0], train_data[1]):
    if counts.get(target) >= min_class:
        pass

    else:
        counts[target] += 1

        balanced_train_feat.append(features)
        balanced_train_targets.append(target)

In [11]:
balanced_nums = list()

for i in range(9):
    temp = 0

    for cl in balanced_train_targets:
        if cl == i:
            temp += 1

    balanced_nums.append(temp)

px.bar(balanced_nums)

In [12]:
balanced_train_data = [balanced_train_feat, balanced_train_targets]

# Saving the data

In [13]:
with open("data/express/train_data", "wb") as f:
    pickle.dump(balanced_train_data, f)