In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler

In [2]:
pose_train_data = np.load("train_data/encoded_data/pose_train_data.npy")
pose_train_labels = np.load("train_data/pose_train_labels.npy")

In [3]:
pose_train_data.shape, pose_train_labels.shape

((40, 30), (40,))

In [4]:
pose_val_data = np.load("val_data/encoded_data/pose_val_data.npy")
pose_val_labels = np.load("val_data/pose_val_labels.npy")

In [5]:
pose_val_data.shape, pose_val_labels.shape

((17, 30), (17,))

## Prepare two data representations

Prepare encoded data and normalized data. Each to be used in different models 

### 1. Encode the data

Multi-hot encode the arrays to turn them to 0's and 1's to use in the model.

Firstly check for the maximum nuber in the lists of sequences for the dimension:

In [57]:
max_num = max([max(sequence) for sequence in pose_train_data])
max_num

336

In [147]:
def two_dim_encode_sequences(sequences, dimension):
   
    
    # Initialize an all-zero matrix of shape (length of sequences by dimension of data)
    results = np.zeros((len(sequences), dimension+1)) # +1 to include last element
    
    # Loop through data array
    for i, sequence in enumerate(sequences):
        # Loop through each sequence
        for j in sequence:            
            # Set specific indices in the results array to 1.
            results[i, j] = 1.
            
    return results

In [150]:
def three_dim_encode_sequences(sequences, dimension):
   
    
    # Initialize an all-zero matrix of shape (length of sequences by dimension of data)
    results = np.zeros((len(sequences), 30, dimension+1)) # +1 to include last element
    
    # Loop through data array
    for i, sequence in enumerate(sequences):
        # Loop through each sequence
        for j, s in enumerate(sequence):            
            # Set specific indices in the results array to 1.
            results[i, j, s] = 1.
            
    return results

In [151]:
# Two dimensional encoding of train data
two_dim_encoded_train_data = two_dim_encode_sequences(pose_train_data, max_num)

# Two dimensional encoding of val data
two_dim_encoded_val_data = two_dim_encode_sequences(pose_val_data, max_num)

In [157]:
print(f"Two dimensional encoded train data shape: {two_dim_encoded_train_data.shape}")
print(f"Two dimensional encoded val data shape: {two_dim_encoded_val_data.shape}")

Two dimensional encoded train data shape: (40, 337)
Two dimensional Encoded val data shape: (17, 337)


In [158]:
# Three dimensional encoding of train data
three_dim_encoded_train_data = three_dim_encode_sequences(pose_train_data, max_num)

# Three dimensional encoding of val data
three_dim_encoded_val_data = three_dim_encode_sequences(pose_val_data, max_num)

In [159]:
print(f"Three dimensional encoded train data shape: {three_dim_encoded_train_data.shape}")
print(f"Three dimensional encoded val data shape: {three_dim_encoded_val_data.shape}")

Three dimensional encoded train data shape: (40, 30, 337)
Three dimensional Encoded val data shape: (17, 30, 337)


## Save encoded data

In [7]:
# Save both two and three dimensional training data
np.save("train_data/encoded_data/two_dim_encoded_train_data.npy", two_dim_encoded_train_data)
np.save("train_data/encoded_data/three_dim_encoded_train_data.npy", three_dim_encoded_train_data)

In [161]:
# Save both two and three dimensional val data
np.save("val_data/encoded_data/two_dim_encoded_val_data.npy", two_dim_encoded_val_data)
np.save("val_data/encoded_data/three_dim_encoded_val_data.npy", three_dim_encoded_val_data)