In [1]:
import mediapipe as mp
import pickle as pk
from tqdm import tqdm
import cv2
import os
import pickle
import datetime as dt

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### ISL Dataset

In [2]:
isl_dir = './data/isl_data/'
print(f'Number of signs included: {len(os.listdir(isl_dir))}')
isl_signs = os.listdir(isl_dir)
isl_signs.sort()
for d in isl_signs:
    if d == '.DS_Store':
        continue
    print(f'{d}: {len(os.listdir(os.path.join(isl_dir, d)))}')

Number of signs included: 36
1: 1200
2: 1200
3: 1200
4: 1200
5: 1200
6: 1200
7: 1200
8: 1200
9: 1200
A: 1200
B: 1200
C: 1200
D: 1200
E: 1200
F: 1200
G: 1200
H: 1200
I: 1200
J: 1200
K: 1200
L: 1200
M: 1200
N: 1200
O: 1200
P: 1200
Q: 1200
R: 1200
S: 1200
T: 1200
U: 1200
V: 1200
W: 1200
X: 1200
Y: 1200
Z: 1200


### Creating labelled dataset

In [3]:
# initiating mediapipe configurations

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [4]:
# initializing image directory

DATA_DIR = './data/gen_data'
data_prefix = f'prep_data/{DATA_DIR.split("/")[-1]}'

In [5]:
data = []
labels = []
max_count = 20

for dir_ in tqdm(os.listdir(DATA_DIR)):
    if dir_ == '.DS_Store':
        continue
    count = 0
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []
        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x)
                    data_aux.append(y)

            data.append(data_aux)
            labels.append(dir_)
        
        count += 1
        if count == max_count:
            break

100%|██████████| 4/4 [00:01<00:00,  2.19it/s]


In [6]:
now = dt.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
f = open(f'{data_prefix}_{now}.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
print(f'Dataset stored at {data_prefix}_{now}.pickle')
f.close()

Dataset stored at prep_data/gen_data_2023_10_21_11_32_16.pickle


### Training a model - Random Forest Classifier (Scikit-learn)

In [11]:
# initializing model directory

DATA_LOC = 'prep_data/gen_data_2023_10_21_11_32_16.pickle'
model_prefix = f'trained_models/{DATA_LOC.split("/")[-1].split("_")[0]}_model'

# loading labelled dataset

data_dict = pickle.load(open(DATA_LOC, 'rb'))

In [12]:
# initializing data & label variables

data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

In [13]:
# train-test split

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

In [14]:
# training Random Forest Classifier model

clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
print('Training...')
clf.fit(X_train, y_train)

# testing the trained model

print('Testing...')
y_pred = clf.predict(X_test)
print(f'{accuracy_score(y_test, y_pred) * 100}% of samples classified correctly')

# storing the model

now = dt.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
f = open(f'{model_prefix}_{now}.pickle', 'wb')
pickle.dump({'model': clf}, f)
f.close()
print(f'Model stored at {model_prefix}_{now}.pickle')

Training...
Testing...
100.0% of samples classified correctly
Model stored at trained_models/gen_model_2023_10_21_11_35_07.pickle
