<a href="https://colab.research.google.com/github/Prothoma2001/Real-Time-Bangla-Sign-Language-Recognition-Thesis-/blob/main/Bangla_Continuous_Sign_Language_Recognition_using_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mediapipe

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mediapipe
  Downloading mediapipe-0.9.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (33.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.0/33.0 MB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mediapipe
Successfully installed mediapipe-0.9.1.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import cv2
import mediapipe as mp
import pandas as pd

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

def extract_hand_landmarks(video_path, max_frames=None):
    # open video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None
    
    # initialize mediapipe hands
    with mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
        # loop through video frames and extract hand landmarks
        landmarks_all = []
        frame_id = 0
        while True:
            # read frame from video
            ret, frame = cap.read()
            if not ret:
                break
                
            # convert frame to RGB
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # get hand landmarks
            results = hands.process(image)
            if results.multi_hand_landmarks:
                for i, hand_landmarks in enumerate(results.multi_hand_landmarks):
                    # flatten landmarks and add frame and hand IDs
                    landmarks = [frame_id, i]
                    for landmark in hand_landmarks.landmark:
                        landmarks.append(landmark.x)
                        landmarks.append(landmark.y)
                        landmarks.append(landmark.z)
                        
                    landmarks_all.append(landmarks)
            
            # break if maximum number of frames is reached
            if max_frames is not None and frame_id >= max_frames:
                break
                
            frame_id += 1
        
    cap.release()
    
    return landmarks_all


def extract_folder_landmarks(folder_path, landmarks_file, labels_file, max_frames=None):
    # create CSV files for landmarks and labels
    
    # loop through videos in folder and extract landmarks
    for label, folder_name in enumerate(os.listdir(folder_path)):
        # skip non-directory files
        if not os.path.isdir(os.path.join(folder_path, folder_name)):
            continue
            
        # loop through video files in folder and extract landmarks
        for filename in os.listdir(os.path.join(folder_path, folder_name)):
            # skip non-video files
            if not filename.endswith('.mp4'):
                continue
                
            # extract landmarks from video
            video_path = os.path.join(folder_path, folder_name, filename)
            landmarks_all = extract_hand_landmarks(video_path, max_frames=max_frames)
            
            # save landmarks to CSV file
            with open(landmarks_file, 'a') as f:
                for landmarks in landmarks_all:
                    landmarks_str = ','.join([str(l) for l in landmarks])
                    f.write('{},{}\n'.format(label, landmarks_str + '\n'))
            
            # save label to CSV file
            with open(labels_file, 'a') as f:
                f.write('{},{}\n'.format(filename, label))
    
    print('Landmarks saved to {}.'.format(landmarks_file))
    print('Labels saved to {}.'.format(labels_file))
    

# example usage
folder_path = '/content/drive/MyDrive/data'
landmarks_file = 'hand_landmarks.csv'
labels_file = 'labels.csv'
max_frames = 200
extract_folder_landmarks(folder_path, landmarks_file, labels_file, max_frames)



In [None]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from keras.utils import np_utils

# Load the landmarks data from CSV file
landmarks_df = pd.read_csv('/content/sample_data/landmarks.csv')

# Load the labels data from CSV file
labels_df = pd.read_csv('/content/sample_data/labels.csv')


# Remove rows with missing values (i.e., empty rows)
landmarks_df.dropna(inplace=True)

# Save the resulting DataFrame to a new CSV file
landmarks_df.to_csv('landmarks.csv', index=False)

# Remove rows with missing values (i.e., empty rows)
labels_df.dropna(inplace=True)

# Save the resulting DataFrame to a new CSV file
labels_df.to_csv('labels.csv', index=False)


# Extract the landmarks data as a numpy array
landmarks = landmarks_df.to_numpy()

# Extract the labels data as a numpy array
labels = labels_df.to_numpy()


# Convert labels to categorical format
num_classes = np.unique(labels).shape[0]
labels = np_utils.to_categorical(labels, num_classes)

#Normalize the landmarks data
landmarks = landmarks / 255.0

# Reshape landmarks data to 4D tensor
landmarks = landmarks.reshape(landmarks.shape[0], 13, 5, 1)


In [None]:
print(landmarks.shape)
print(labels.shape)

(38383, 13, 5, 1)
(38383, 4)


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(landmarks, labels, test_size=0.3, random_state=42)

In [None]:
import time
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, LSTM
from keras.layers import Reshape

# Define the CNN model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=X_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(Dropout(0.25))

#model.add(Conv2D(64, (3, 3), padding='same'))
#model.add(Activation('relu'))
#model.add(Conv2D(64, (3, 3)))
#model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
#model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
#model.add(Dropout(0.5))
#model.add(Reshape((1, 512)))
#model.add(LSTM(32, dropout=0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
batch_size = 8
epochs = 300
start_time = time.time()

history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))

# Evaluate model on testing set
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


run_time1 = (time.time()- start_time)
print("-----%s seconds-----" % run_time1)


model.save("CNN_model_videos_3.h5")

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [None]:
#   Accuracy = 90.16%