## Preprocessing the Dataset Created
- Normalizing the dataset
- Converting the labels into one-hot encoded form

In [27]:

from tensorflow.keras.utils import to_categorical
import numpy as np
import os
from tqdm import tqdm
tqdm.pandas()
from pymongo import MongoClient
import pandas as pd

In [28]:
from Create_dataset import actions , no_of_videos , no_of_frames , DATA_PATH

In [29]:
# File Path to store the landmark data

DATA_PATH = os.path.join("LANDMARK_DATA")

# Defining the actions 
actions = np.array(['hello' , 'thankyou' , 'My' , 'sorry', 'Name' , 'You' , 'I am' , 'Nice' ,'Meet' , 'Fine', 'I'])
# Defining the number of videos per action

no_of_videos = 100

# Defining the number of frames per video

no_of_frames = 30

In [30]:
for num , label in enumerate(actions):
 print(num , label)

0 hello
1 thankyou
2 My
3 sorry
4 Name
5 You
6 I am
7 Nice
8 Meet
9 Fine
10 I


In [31]:
actions

array(['hello', 'thankyou', 'My', 'sorry', 'Name', 'You', 'I am', 'Nice',
       'Meet', 'Fine', 'I'], dtype='<U8')

In [32]:
# Creating Label Map 

label_map = {label:num for num , label in enumerate(actions)}

In [33]:
label_map

{'hello': 0,
 'thankyou': 1,
 'My': 2,
 'sorry': 3,
 'Name': 4,
 'You': 5,
 'I am': 6,
 'Nice': 7,
 'Meet': 8,
 'Fine': 9,
 'I': 10}

In [34]:
 # importing the keypoint data stored as numpy arrays locally

video_data , labels = [] , []

for action in actions:
  for video in range(no_of_videos):

   window = []

   for frame_num in range(no_of_frames):
     res = np.load(os.path.join(DATA_PATH , action , str(video) , "{}.npy".format(frame_num)))
     window.append(res)
   
   video_data.append(window)
   labels.append(label_map[action])

NOTE : The key points extracted from mediapipe are already normalized between 0-1

In [35]:
np.array(video_data).shape

(1100, 30, 1662)

In [36]:
video_data

[[array([ 0.48907107,  0.31065953, -1.52472782, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.50588286,  0.43122151, -1.3196938 , ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.51067424,  0.43701315, -1.17469358, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.52083284,  0.44172639, -1.05210757, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.51937109,  0.44772437, -1.02967119, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.51915282,  0.45715916, -0.74745756, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.51843125,  0.46929976, -0.87260878, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.51749331,  0.46095556, -0.70599127, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.51346844,  0.45430243, -0.86979645, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.51234734,  0.45008966, -0.7774955 , ...,  0.2658312 ,
       

In [37]:
np.array(labels).shape

(1100,)

In [38]:
X = np.array(video_data)

In [39]:
X.shape

(1100, 30, 1662)

In [40]:
X

array([[[ 0.48907107,  0.31065953, -1.52472782, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.50588286,  0.43122151, -1.3196938 , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.51067424,  0.43701315, -1.17469358, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.4926661 ,  0.43284616, -0.76935244, ...,  0.16717376,
          0.15810913, -0.05844192],
        [ 0.49296832,  0.43348545, -0.77340305, ...,  0.16804329,
          0.16028863, -0.05659148],
        [ 0.49369201,  0.43327758, -0.77868259, ...,  0.1671429 ,
          0.16293401, -0.05479354]],

       [[ 0.49377102,  0.43369636, -0.78066176, ...,  0.16670072,
          0.16406117, -0.05632574],
        [ 0.47779107,  0.43192369, -0.75557107, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.47237068,  0.43091211, -0.74847233, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.46164799,  0.45209727, -0.84796935, ...,  

In [41]:
y = to_categorical(labels).astype(int)

In [42]:
y.shape

(1100, 11)

In [43]:
len(X)

1100

In [44]:
preprocessed_df = pd.DataFrame({"Landmarks" : video_data , "Labels" : labels})

In [45]:
preprocessed_df.head()

Unnamed: 0,Landmarks,Labels
0,"[[0.4890710711479187, 0.3106595277786255, -1.5...",0
1,"[[0.4937710165977478, 0.43369635939598083, -0....",0
2,"[[0.4835539758205414, 0.44761306047439575, -0....",0
3,"[[0.4873453974723816, 0.4510439932346344, -0.9...",0
4,"[[0.48652729392051697, 0.43328532576560974, -0...",0


In [46]:
for x in (preprocessed_df["Landmarks"][0]):
 print(type(x))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [47]:
# Converting each frame which is array form to list form for each video to insert into MongoDB as mongoDB requires data to be in the form of lists
preprocessed_df['Landmarks'] = preprocessed_df["Landmarks"].progress_apply(lambda landmarks: [arr.tolist() if isinstance(arr , np.ndarray) else arr for arr in landmarks] )

100%|██████████| 1100/1100 [00:13<00:00, 78.87it/s] 


In [48]:
type(preprocessed_df['Landmarks'][0][0])

list

In [49]:
records = preprocessed_df.to_dict(orient='records')

In [50]:
type(records[1]['Landmarks'])

list

In [51]:
# Estabishing MongoDB connection

connection = MongoClient('localhost' , 27017)
db = connection['mydb']
collection = db['Preprocessed_Landmark_Data']

In [52]:
collection.insert_many(records)

InsertManyResult([ObjectId('6763a74a70cf5c4a5e95f31d'), ObjectId('6763a74a70cf5c4a5e95f31e'), ObjectId('6763a74a70cf5c4a5e95f31f'), ObjectId('6763a74a70cf5c4a5e95f320'), ObjectId('6763a74a70cf5c4a5e95f321'), ObjectId('6763a74a70cf5c4a5e95f322'), ObjectId('6763a74a70cf5c4a5e95f323'), ObjectId('6763a74a70cf5c4a5e95f324'), ObjectId('6763a74a70cf5c4a5e95f325'), ObjectId('6763a74a70cf5c4a5e95f326'), ObjectId('6763a74a70cf5c4a5e95f327'), ObjectId('6763a74a70cf5c4a5e95f328'), ObjectId('6763a74a70cf5c4a5e95f329'), ObjectId('6763a74a70cf5c4a5e95f32a'), ObjectId('6763a74a70cf5c4a5e95f32b'), ObjectId('6763a74a70cf5c4a5e95f32c'), ObjectId('6763a74a70cf5c4a5e95f32d'), ObjectId('6763a74a70cf5c4a5e95f32e'), ObjectId('6763a74a70cf5c4a5e95f32f'), ObjectId('6763a74a70cf5c4a5e95f330'), ObjectId('6763a74a70cf5c4a5e95f331'), ObjectId('6763a74a70cf5c4a5e95f332'), ObjectId('6763a74a70cf5c4a5e95f333'), ObjectId('6763a74a70cf5c4a5e95f334'), ObjectId('6763a74a70cf5c4a5e95f335'), ObjectId('6763a74a70cf5c4a5e95f3