## Preprocess Data and Create Labels and Features

### Libraries

In [1]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import numpy as np
import os

### Data processing

In [2]:
# Path for exported data, numpy arrays
DATA_PATH=os.path.join('MP_Data')

# Thirty videos worth of data
no_sequences=30

# Videos are going to be 30 frames in length
sequence_length=30

In [3]:
actions = np.array(['adios', 'amor', 'animal',
                    'bienvenida', 'gracias', 'hola', 
                    'hombre', 'internet', 'mujer',
                    'nino_na', 'objeto', 'persona',
                    'radio', 'television', 'tristeza'])

In [4]:
label_map={label:num for num, label in enumerate(actions)}

In [5]:
label_map

{'adios': 0,
 'amor': 1,
 'animal': 2,
 'bienvenida': 3,
 'gracias': 4,
 'hola': 5,
 'hombre': 6,
 'internet': 7,
 'mujer': 8,
 'nino_na': 9,
 'objeto': 10,
 'persona': 11,
 'radio': 12,
 'television': 13,
 'tristeza': 14}

In [6]:
sequences, labels=[], []
for action in actions:
    for sequence in range(no_sequences):
        window=[]
        for frame_num in range(sequence_length):
            res=np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [7]:
np.array(sequences).shape

(450, 30, 1662)

In [8]:
np.array(labels).shape

(450,)

In [9]:
X=np.array(sequences)
X.shape

(450, 30, 1662)

In [10]:
y=to_categorical(labels).astype(int)
y.shape

(450, 15)

In [11]:
y

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [12]:
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.05)

In [13]:
X_train.shape

(427, 30, 1662)

In [14]:
y_train.shape

(427, 15)

In [15]:
X_test.shape

(23, 30, 1662)

In [16]:
y_test.shape

(23, 15)

### Saving Data

In [17]:
data_folder='dataset'

if not os.path.exists(data_folder):
    os.makedirs(data_folder)

We save the data into files within the 'data' folder

In [18]:
np.save(os.path.join(data_folder, 'X_train.npy'), X_train)
np.save(os.path.join(data_folder, 'X_test.npy'), X_test)
np.save(os.path.join(data_folder, 'y_train.npy'), y_train)
np.save(os.path.join(data_folder, 'y_test.npy'), y_test)


To load these data later, you can use the same `np.load()` function specifying the folder path and the corresponding file name

In [27]:
X_train_recovered = np.load(os.path.join(data_folder, 'X_train.npy'))
X_test_recuperado = np.load(os.path.join(data_folder, 'X_test.npy'))
y_train_recuperado = np.load(os.path.join(data_folder, 'y_train.npy'))
y_test_recuperado = np.load(os.path.join(data_folder, 'y_test.npy'))

(427, 30, 1662)