In [1]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from data_processor import names_list, create_final_features, create_labels_dict, train_test_extract,OHK

In [2]:
SAVE_PATH=os.path.join('data')
SAVE_PATH

'data'

## 1. Get final feature array of shape (total sample_count X features X input_size)

### Extract the list of file names in the format [action, sample_number, filename]

If you have downloaded the data from the Github repo, please unzip data.rar in the same directory before continuing

In [3]:
file_params=names_list(SAVE_PATH)

Total file count: 4500


### From the numpy array of the data captured, we will now create a consolidated feature array (from the numpy arrays) and labels array (from the filename)

In [4]:
features,labels=create_final_features(SAVE_PATH, file_params)

In [5]:
print('The labels for all data points in order are ',labels)

The labels for all data points in order are  ['ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'ascend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'descend', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'not ok', 'ok', 'ok', 'ok', 'ok', 

In [6]:
print('Shape of the final dataset: ',features.shape)

Shape of the final dataset:  (150, 30, 126)


 ##### Final data shape:
 Actions - 5 <br>
 Samples /action - 30 <br>
 <b>Hence, total number of samples = 150</b> <br><br>
 
 Frames - 30  <- Total number of tokens <br>
 landmarks = 126 <- Length of each input token <br>
 <b> Features shape = (30,126) </b>
 
 

In [7]:
FINAL_SAVE_PATH=os.path.join(SAVE_PATH, 'final','final_features.npy')
np.save(FINAL_SAVE_PATH,features)

### 2. Get label array of size (total sample size X no. of classes) - One hot encoding

In [8]:
dict_labels, dict_reverse_labels=create_labels_dict(labels)

{0: 'ok', 1: 'descend', 2: 'ascend', 3: 'not ok', 4: 'stop'}
{'ok': 0, 'descend': 1, 'ascend': 2, 'not ok': 3, 'stop': 4}


In [9]:
ohk=OHK(labels,dict_reverse_labels)
print(len(ohk))

150


#### This is a super important step. When we are testing our final live video, we need to ensure that our labels map correctly to the One hot key encoding and hence to the output index provided for the labels dictionary

In [10]:
for r in range(len(ohk)):
    print(f'Label: {labels[r]}\tOHK:{ohk[r]}')

Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0. 0. 1. 0. 0.]
Label: ascend	OHK:[0

Hence:<br>
<ul>
    <b>Action:&nbsp; &nbsp; One hot encoding</b>
    <li>ascend:&nbsp; &nbsp; [0. 0. 0. 0. 1.]</li> 
    <li>not ok:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[0. 0. 0. 1. 0.]</li>
    <li>descend:&nbsp;&nbsp;[0. 1. 0. 0. 0.]</li>
    <li>stop:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[1. 0. 0. 0. 0.]</li>

In [11]:
ohk=np.array(ohk)
print(ohk.shape)

(150, 5)


In [12]:
np.save(os.path.join(SAVE_PATH, 'final','final_labels.npy'),ohk)

### 3. Shuffle and perform train test split

### Train test split

In [16]:
X_train, X_test,y_train, y_test=train_test_extract(features, ohk)

(150, 3780)
(150, 5)


In [19]:
print('X_train shape:',X_train.shape)
print('X_test shape:',X_test.shape)
print('y_train shape:',y_train.shape)
print('y_test shape:',y_test.shape)


X_train shape: (142, 30, 126)
X_test shape: (8, 30, 126)
y_train shape: (142, 5)
y_test shape: (8, 5)


In [20]:
np.save(os.path.join(SAVE_PATH, 'final','X_train.npy'),X_train)
np.save(os.path.join(SAVE_PATH, 'final','y_train.npy'),y_train)
np.save(os.path.join(SAVE_PATH, 'final','X_test.npy'),X_test)
np.save(os.path.join(SAVE_PATH, 'final','y_test.npy'),y_test)