In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler

In [2]:
pose_train_data = np.load("train_data/encoded_data/pose_train_data.npy")
pose_train_labels = np.load("train_data/pose_train_labels.npy")

In [3]:
pose_train_data.shape, pose_train_labels.shape

((40, 30), (40,))

In [4]:
pose_val_data = np.load("val_data/encoded_data/pose_val_data.npy")
pose_val_labels = np.load("val_data/pose_val_labels.npy")

In [5]:
pose_val_data.shape, pose_val_labels.shape

((17, 30), (17,))

## Prepare two data representations

Prepare encoded data and normalized data. Each to be used in different models 

### 1. Encode the data

Multi-hot encode the arrays to turn them to 0's and 1's to use in the model.

Firstly check for the maximum nuber in the lists of sequences for the dimension:

In [6]:
max_num = max([max(sequence) for sequence in pose_train_data])
max_num

331

In [7]:
def two_dim_encode_sequences(sequences, dimension):
   
    
    # Initialize an all-zero matrix of shape (length of sequences by dimension of data)
    results = np.zeros((len(sequences), dimension+1)) # +1 to include last element
    
    # Loop through data array
    for i, sequence in enumerate(sequences):
        # Loop through each sequence
        for j in sequence:            
            # Set specific indices in the results array to 1.
            results[i, j] = 1.
            
    return results

In [8]:
def three_dim_encode_sequences(sequences, dimension):
   
    
    # Initialize an all-zero matrix of shape (length of sequences by dimension of data)
    results = np.zeros((len(sequences), 30, dimension+1)) # +1 to include last element
    
    # Loop through data array
    for i, sequence in enumerate(sequences):
        # Loop through each sequence
        for j, s in enumerate(sequence):            
            # Set specific indices in the results array to 1.
            results[i, j, s] = 1.
            
    return results

In [9]:
# Two dimensional encoding of train data
two_dim_encoded_train_data = two_dim_encode_sequences(pose_train_data, max_num)

# Two dimensional encoding of val data
two_dim_encoded_val_data = two_dim_encode_sequences(pose_val_data, max_num)

In [10]:
print(f"Two dimensional encoded train data shape: {two_dim_encoded_train_data.shape}")
print(f"Two dimensional encoded val data shape: {two_dim_encoded_val_data.shape}")

Two dimensional encoded train data shape: (40, 332)
Two dimensional encoded val data shape: (17, 332)


In [11]:
# Three dimensional encoding of train data
three_dim_encoded_train_data = three_dim_encode_sequences(pose_train_data, max_num)

# Three dimensional encoding of val data
three_dim_encoded_val_data = three_dim_encode_sequences(pose_val_data, max_num)

In [12]:
print(f"Three dimensional encoded train data shape: {three_dim_encoded_train_data.shape}")
print(f"Three dimensional encoded val data shape: {three_dim_encoded_val_data.shape}")

Three dimensional encoded train data shape: (40, 30, 332)
Three dimensional encoded val data shape: (17, 30, 332)


## Save encoded data

In [13]:
# Save both two and three dimensional training data
np.save("train_data/encoded_data/two_dim_encoded_train_data.npy", two_dim_encoded_train_data)
np.save("train_data/encoded_data/three_dim_encoded_train_data.npy", three_dim_encoded_train_data)

In [14]:
# Save both two and three dimensional val data
np.save("val_data/encoded_data/two_dim_encoded_val_data.npy", two_dim_encoded_val_data)
np.save("val_data/encoded_data/three_dim_encoded_val_data.npy", three_dim_encoded_val_data)

### 2. Normalize the data

In [16]:
def two_dim_norm_sequences(sequences):
    """
    Function to normalize sequences of two dimensional shape
    
    Args:
    sequences - array of sequences to be normalized
    
    Returns a two-dimensional array of normalized sequences
    """
    
    # Create feature scaling object
    sc = StandardScaler()
    
    # Create scaler on data
    normalized_data = sc.fit_transform(sequences)
    
    return normalized_data
    

In [17]:
def three_dim_norm_sequences(sequences, m, n):
    """
    Function to normalize sequences of three dimensional shape
    
    Args:
    sequences - array of sequences to be normalized
    m - number of samples (e.g. amount of videos)
    n - number of features for each video
    
    Returns a three-dimensional array of normalized sequences
    """
    
    # Create feature scaling object
    sc = StandardScaler()
    
    # Create scaler on data
    normalized_data = sc.fit_transform(sequences)
    
    # Reshape data to three dimensional
    three_dim_data = normalized_data.reshape(m, -1, n)    
    
    
    return three_dim_data
    

In [18]:
# Two dimensional normalizing of train data
two_dim_normalized_train_data = two_dim_norm_sequences(pose_train_data)

# Two dimensional normalizing of val data
two_dim_normalized_val_data = two_dim_norm_sequences(pose_val_data)

In [19]:
print(f"Two dimensional normalized train data shape: {two_dim_normalized_train_data.shape}")
print(f"Two dimensional normalized val data shape: {two_dim_normalized_val_data.shape}")

Two dimensional normalized train data shape: (40, 30)
Two dimensional normalized val data shape: (17, 30)


In [20]:
two_dim_normalized_train_data[0]

array([ 0.36854355,  0.51322073,  0.54746812,  0.85915534,  0.74495985,
        0.56445844,  0.85793352,  0.87229263,  1.20356918,  0.46580719,
        1.15558966,  0.47936631,  0.91800434,  0.22787509,  1.474253  ,
        0.31207533,  1.66858181,  0.44229726,  1.3336552 ,  0.20504292,
        0.51793507,  0.31724914, -0.29871492,  0.80779409,  0.00289765,
        1.00970888,  0.20913165,  0.55279772,  0.80140249,  0.12219091])

In [21]:
# Two dimensional normalizing of train data
three_dim_normalized_train_data = three_dim_norm_sequences(pose_train_data, 40, 2)

# Two dimensional normalizing of val data
three_dim_normalized_val_data = three_dim_norm_sequences(pose_val_data, 17, 2)

In [22]:
print(f"Three dimensional normalized train data shape: {three_dim_normalized_train_data.shape}")
print(f"Three dimensional normalized val data shape: {three_dim_normalized_val_data.shape}")

Three dimensional normalized train data shape: (40, 15, 2)
Three dimensional normalized val data shape: (17, 15, 2)


In [23]:
three_dim_normalized_train_data[0]

array([[ 0.36854355,  0.51322073],
       [ 0.54746812,  0.85915534],
       [ 0.74495985,  0.56445844],
       [ 0.85793352,  0.87229263],
       [ 1.20356918,  0.46580719],
       [ 1.15558966,  0.47936631],
       [ 0.91800434,  0.22787509],
       [ 1.474253  ,  0.31207533],
       [ 1.66858181,  0.44229726],
       [ 1.3336552 ,  0.20504292],
       [ 0.51793507,  0.31724914],
       [-0.29871492,  0.80779409],
       [ 0.00289765,  1.00970888],
       [ 0.20913165,  0.55279772],
       [ 0.80140249,  0.12219091]])

### Save normalized data

In [52]:
# Make directory to save numpy files
try:
    print("Creating normalizeed train data folder...")
    os.mkdir(f"train_data/normalized_data")
    print("Creating normalized val data folder...")
    os.mkdir(f"val_data/normalized_data")
except FileExistsError:
    print("Directory already exist")
except:
    print("Unforseen circumstance")

Creating normalizeed train data folder...
Creating normalized val data folder...


In [24]:
# Save both two and three dimensional training data
np.save("train_data/normalized_data/two_dim_normalized_train_data.npy", two_dim_normalized_train_data)
np.save("train_data/normalized_data/three_dim_normalized_train_data.npy", three_dim_normalized_train_data)

In [25]:
# Save both two and three dimensional val data
np.save("val_data/normalized_data/two_dim_normalized_val_data.npy", two_dim_normalized_val_data)
np.save("val_data/normalized_data/three_dim_normalized_val_data.npy", three_dim_normalized_val_data)