## Preprocessing the Dataset Created
- Normalizing the dataset
- Converting the labels into one-hot encoded form

In [63]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import numpy as np
import os
from tqdm import tqdm
tqdm.pandas()
from pymongo import MongoClient
import pandas as pd

In [64]:
# File Path to store the landmark data

DATA_PATH = os.path.join("LANDMARK_DATA")

# Defining the actions 

actions = np.array(['hello' , 'thankyou' , 'iloveyou'])

# Defining the number of videos per action

no_of_videos = 30 

# Defining the number of frames per video

no_of_frames = 30


In [65]:
for num , label in enumerate(actions):
 print(num , label)

0 hello
1 thankyou
2 iloveyou


In [66]:
# Creating Label Map 

label_map = {label:num for num , label in enumerate(actions)}

In [67]:
label_map

{'hello': 0, 'thankyou': 1, 'iloveyou': 2}

In [68]:
 # importing the keypoint data stored as numpy arrays locally

video_data , labels = [] , []

for action in actions:
  for video in range(no_of_videos):

   window = []

   for frame_num in range(no_of_frames):
     res = np.load(os.path.join(DATA_PATH , action , str(video) , "{}.npy".format(frame_num)))
     window.append(res)
   
   video_data.append(window)
   labels.append(label_map[action])

NOTE : The key points extracted from mediapipe are already normalized between 0-1

In [69]:
np.array(video_data).shape

(90, 30, 1662)

In [70]:
video_data

[[array([ 0.48065224,  0.49961472, -2.0412364 , ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.548204  ,  0.49962679, -1.46068573, ...,  0.32158235,
          0.4842366 , -0.02871111]),
  array([ 0.56474423,  0.49930447, -1.46504617, ...,  0.29121   ,
          0.53220797, -0.00797502]),
  array([ 0.55826348,  0.49939868, -1.63277209, ...,  0.        ,
          0.        ,  0.        ]),
  array([ 0.55256563,  0.49950689, -1.56892347, ...,  0.3097041 ,
          0.82298547, -0.00171271]),
  array([ 0.55288625,  0.4994345 , -1.61392653, ...,  0.3425214 ,
          0.95327604, -0.04101617]),
  array([ 0.55006087,  0.49920294, -1.5357002 , ...,  0.3447842 ,
          0.9435519 , -0.04059755]),
  array([ 0.54760826,  0.49683374, -1.53707707, ...,  0.33501303,
          0.91033256, -0.03627473]),
  array([ 0.54662049,  0.49504429, -1.51402426, ...,  0.33440495,
          0.90888566, -0.03550041]),
  array([ 0.54551339,  0.4950456 , -1.57081664, ...,  0.33054742,
       

In [71]:
np.array(labels).shape

(90,)

In [72]:
X = np.array(video_data)

In [73]:
X.shape

(90, 30, 1662)

In [74]:
y = to_categorical(labels).astype(int)

In [75]:
y.shape

(90, 3)

In [76]:
len(X)

90

In [77]:
preprocessed_df = pd.DataFrame({"Landmarks" : video_data , "Labels" : labels})

In [78]:
preprocessed_df.head()

Unnamed: 0,Landmarks,Labels
0,"[[0.48065224289894104, 0.4996147155761719, -2....",0
1,"[[0.5581340789794922, 0.47436487674713135, -1....",0
2,"[[0.5391056537628174, 0.4753058850765228, -1.5...",0
3,"[[0.4901489317417145, 0.47468236088752747, -1....",0
4,"[[0.608124315738678, 0.4691307842731476, -1.36...",0


In [92]:
for x in (preprocessed_df["Landmarks"][0]):
 print(type(x))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [None]:
# Converting each frame which is array form to list form for each video to insert into MongoDB as mongoDB requires data to be in the form of lists
preprocessed_df['Landmarks'] = preprocessed_df["Landmarks"].progress_apply(lambda landmarks: [arr.tolist() if isinstance(arr , np.ndarray) else arr for arr in landmarks] )

100%|██████████| 90/90 [00:00<00:00, 510.16it/s]


In [107]:
type(preprocessed_df['Landmarks'][0][0])

list

In [108]:
records = preprocessed_df.to_dict(orient='records')

In [110]:
type(records[1]['Landmarks'])

list

In [111]:
# Estabishing MongoDB connection

connection = MongoClient('localhost' , 27017)
db = connection['mydb']
collection = db['Preprocessed_Landmark_Data']

In [112]:
collection.insert_many(records)

InsertManyResult([ObjectId('6750458bdc7ee572e313ee94'), ObjectId('6750458bdc7ee572e313ee95'), ObjectId('6750458bdc7ee572e313ee96'), ObjectId('6750458bdc7ee572e313ee97'), ObjectId('6750458bdc7ee572e313ee98'), ObjectId('6750458bdc7ee572e313ee99'), ObjectId('6750458bdc7ee572e313ee9a'), ObjectId('6750458bdc7ee572e313ee9b'), ObjectId('6750458bdc7ee572e313ee9c'), ObjectId('6750458bdc7ee572e313ee9d'), ObjectId('6750458bdc7ee572e313ee9e'), ObjectId('6750458bdc7ee572e313ee9f'), ObjectId('6750458bdc7ee572e313eea0'), ObjectId('6750458bdc7ee572e313eea1'), ObjectId('6750458bdc7ee572e313eea2'), ObjectId('6750458bdc7ee572e313eea3'), ObjectId('6750458bdc7ee572e313eea4'), ObjectId('6750458bdc7ee572e313eea5'), ObjectId('6750458bdc7ee572e313eea6'), ObjectId('6750458bdc7ee572e313eea7'), ObjectId('6750458bdc7ee572e313eea8'), ObjectId('6750458bdc7ee572e313eea9'), ObjectId('6750458bdc7ee572e313eeaa'), ObjectId('6750458bdc7ee572e313eeab'), ObjectId('6750458bdc7ee572e313eeac'), ObjectId('6750458bdc7ee572e313ee