## Preprocessing the Dataset Created
- Normalizing the dataset
- Converting the labels into one-hot encoded form

In [49]:

from tensorflow.keras.utils import to_categorical
import numpy as np
import os
from tqdm import tqdm
tqdm.pandas()
from pymongo import MongoClient
import pandas as pd

In [50]:
# File Path to store the landmark data

DATA_PATH = os.path.join("LANDMARK_DATA")

# Defining the actions 

actions = np.array(['hello' , 'thankyou' , 'iloveyou'])

# Defining the number of videos per action

no_of_videos = 100 

# Defining the number of frames per video

no_of_frames = 30


In [51]:
for num , label in enumerate(actions):
 print(num , label)

0 hello
1 thankyou
2 iloveyou


In [52]:
# Creating Label Map 

label_map = {label:num for num , label in enumerate(actions)}

In [53]:
label_map

{'hello': 0, 'thankyou': 1, 'iloveyou': 2}

In [54]:
 # importing the keypoint data stored as numpy arrays locally

video_data , labels = [] , []

for action in actions:
  for video in range(no_of_videos):

   window = []

   for frame_num in range(no_of_frames):
     res = np.load(os.path.join(DATA_PATH , action , str(video) , "{}.npy".format(frame_num)))
     window.append(res)
   
   video_data.append(window)
   labels.append(label_map[action])

NOTE : The key points extracted from mediapipe are already normalized between 0-1

In [55]:
np.array(video_data).shape

(300, 30, 1662)

In [56]:
video_data

[[array([ 0.5282082 ,  0.69970912, -0.78407443, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.55254889,  0.63023722, -0.78952068, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.55559933,  0.61130732, -0.7708981 , ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.56082523,  0.60821176, -0.77176523, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.56795919,  0.6058867 , -0.81826133, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.56992733,  0.60519606, -0.77989995, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.57354176,  0.60459828, -0.86566448, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.57485545,  0.60356939, -0.90603018, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.5737077 ,  0.60355467, -0.84255922, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.57421315,  0.60291201, -1.03197324, ...,  0.        ,
       

In [57]:
np.array(labels).shape

(300,)

In [58]:
X = np.array(video_data)

In [59]:
X.shape

(300, 30, 1662)

In [60]:
X

array([[[ 0.5282082 ,  0.69970912, -0.78407443, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.55254889,  0.63023722, -0.78952068, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.55559933,  0.61130732, -0.7708981 , ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.62980717,  0.59732503, -1.28325009, ...,  0.47731709,
          0.40780509, -0.02713273],
        [ 0.63064581,  0.59841526, -1.25939536, ...,  0.48286542,
          0.40332043, -0.03363072],
        [ 0.62974828,  0.59842288, -1.20764422, ...,  0.44305485,
          0.3869282 , -0.03627616]],

       [[ 0.62721896,  0.59237534, -1.21063435, ...,  0.39933065,
          0.32641381, -0.06619024],
        [ 0.62298107,  0.5921942 , -1.05823302, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.62047112,  0.59255719, -1.09682631, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.53934711,  0.60268617, -0.99255943, ...,  

In [61]:
y = to_categorical(labels).astype(int)

In [62]:
y.shape

(300, 3)

In [63]:
len(X)

300

In [64]:
preprocessed_df = pd.DataFrame({"Landmarks" : video_data , "Labels" : labels})

In [65]:
preprocessed_df.head()

Unnamed: 0,Landmarks,Labels
0,"[[0.5282081961631775, 0.6997091174125671, -0.7...",0
1,"[[0.6272189617156982, 0.5923753380775452, -1.2...",0
2,"[[0.5349587202072144, 0.598039448261261, -0.98...",0
3,"[[0.692570149898529, 0.6050797700881958, -1.00...",0
4,"[[0.46998652815818787, 0.6263400912284851, -1....",0


In [66]:
for x in (preprocessed_df["Landmarks"][0]):
 print(type(x))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [67]:
# Converting each frame which is array form to list form for each video to insert into MongoDB as mongoDB requires data to be in the form of lists
preprocessed_df['Landmarks'] = preprocessed_df["Landmarks"].progress_apply(lambda landmarks: [arr.tolist() if isinstance(arr , np.ndarray) else arr for arr in landmarks] )

100%|██████████| 300/300 [00:00<00:00, 619.65it/s]


In [68]:
type(preprocessed_df['Landmarks'][0][0])

list

In [69]:
records = preprocessed_df.to_dict(orient='records')

In [70]:
type(records[1]['Landmarks'])

list

In [71]:
# Estabishing MongoDB connection

connection = MongoClient('localhost' , 27017)
db = connection['mydb']
collection = db['Preprocessed_Landmark_Data']

In [72]:
collection.insert_many(records)

InsertManyResult([ObjectId('675421a052425b893f7a9007'), ObjectId('675421a052425b893f7a9008'), ObjectId('675421a052425b893f7a9009'), ObjectId('675421a052425b893f7a900a'), ObjectId('675421a052425b893f7a900b'), ObjectId('675421a052425b893f7a900c'), ObjectId('675421a052425b893f7a900d'), ObjectId('675421a052425b893f7a900e'), ObjectId('675421a052425b893f7a900f'), ObjectId('675421a052425b893f7a9010'), ObjectId('675421a052425b893f7a9011'), ObjectId('675421a052425b893f7a9012'), ObjectId('675421a052425b893f7a9013'), ObjectId('675421a052425b893f7a9014'), ObjectId('675421a052425b893f7a9015'), ObjectId('675421a052425b893f7a9016'), ObjectId('675421a052425b893f7a9017'), ObjectId('675421a052425b893f7a9018'), ObjectId('675421a052425b893f7a9019'), ObjectId('675421a052425b893f7a901a'), ObjectId('675421a052425b893f7a901b'), ObjectId('675421a052425b893f7a901c'), ObjectId('675421a052425b893f7a901d'), ObjectId('675421a052425b893f7a901e'), ObjectId('675421a052425b893f7a901f'), ObjectId('675421a052425b893f7a90