In [7]:
import os
from datetime import datetime

In [11]:
def get_unique_filename(base_path):
    directory, base_filename = os.path.split(base_path)
    timestamp = datetime.now().strftime("%H%M%S")
    unique_filename = f"{base_filename}_{timestamp}.keras"
    unique_path = os.path.join(directory, unique_filename)
    return unique_path

In [None]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [None]:
# Initializing MediaPipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [None]:
def landmarks_detection(image, holistic):
    # Conversion of the BGR image to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = holistic.process(image)
    # Conversion of image back to BGR for displaying purpose
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [None]:
def draw_landmarks(image, results):
    # Drawing landmarks on the image for face, pose, left hand and right hand
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION)
    # mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) 
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) 
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) 
    return None

In [None]:
def draw_styled_landmarks(image, results):

    # Drawing all the connections with more distinct colors and styles
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
    #                          mp_drawing.DrawingSpec(color=(255, 178, 102), thickness=1, circle_radius=1), 
    #                          mp_drawing.DrawingSpec(color=(255, 102, 255), thickness=1, circle_radius=1)
    #                          ) 
    
    # mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
    #                          mp_drawing.DrawingSpec(color=(102, 204, 255), thickness=2, circle_radius=3), 
    #                          mp_drawing.DrawingSpec(color=(50, 205, 50), thickness=2, circle_radius=2)
    #                          ) 
    
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(255, 20, 147), thickness=2, circle_radius=3), 
                             mp_drawing.DrawingSpec(color=(255, 165, 0), thickness=2, circle_radius=2)
                             ) 
    
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(0, 191, 255), thickness=2, circle_radius=3), 
                             mp_drawing.DrawingSpec(color=(147, 112, 219), thickness=2, circle_radius=2)
                             )

In [None]:
# cap = cv2.VideoCapture(0)
# # Set mediapipe model 
# with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
#     while cap.isOpened():
#         # Reading webcam feed
#         ret, frame = cap.read()
#         if not ret:
#             print("No frame to read. Exiting...")
#             break  
#         image, results = landmarks_detection(frame, holistic)       
#         draw_styled_landmarks(image, results)
#         cv2.imshow('Webcam Feed', image)
#         if cv2.waitKey(10) & 0xFF == 27: # Press Esc to exit the window
#             break
#     cap.release()
#     cv2.destroyAllWindows()

In [None]:
def extract_keypoints(results):
    # pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    # face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([lh, rh])

In [None]:
no_sequences = 30  # Number of sequences to capture for each sign
sequence_length = 30  # Number of frames in each sequence
DATA_PATH = './Sign_Language_Dataset'  # Path to save the keypoints data

os.listdir(DATA_PATH)
signs = np.array([i for i in os.listdir(DATA_PATH) if i[0] != '.']) # dynamic way to get the classes

In [None]:
signs

### Pre-Processing 

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
label_map = {label:num for num, label in enumerate(signs)}

In [None]:
label_map

In [None]:
from tqdm import tqdm

sequences, labels = [], []
for action in tqdm(signs, desc="Processing actions", ncols=100):
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])


In [None]:
X = np.array(sequences)
Y = to_categorical(labels).astype(int)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=48)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping

In [None]:
len(X_train), len(X_test)

In [None]:
log_dir = os.path.join('Logs')
#While training access tensorboard
#tensorboard --logdir=.
tb_callback = TensorBoard(log_dir=log_dir)
# EarlyStopping callback
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=20, verbose=1, mode='min', restore_best_weights=True)


In [None]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='tanh', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))  # Dropout layer for regularization
model.add(LSTM(128, return_sequences=True, activation='tanh'))
model.add(Dropout(0.2))  
model.add(LSTM(64, return_sequences=False, activation='tanh')) # Last layer does not return sequences
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))  
model.add(Dense(32, activation='relu'))
model.add(Dense(signs.shape[0], activation='softmax'))

In [None]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
# model.fit(X_train, Y_train, epochs=500, validation_data=(X_test, Y_test), callbacks=[tb_callback, early_stopping_callback])

In [None]:
# model.save('saved_model/sign_language.keras')  

In [None]:
from tensorflow.keras.models import load_model

In [None]:
model = load_model('saved_model/sign_language.keras')

### Realtime Testing

In [None]:
from scipy import stats
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import tensorflow as tf

In [None]:
import cv2
import numpy as np
import mediapipe as mp
from collections import Counter

# New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.8
smoothed_probabilities = None  # Initialize smoothed probabilities
alpha = 0.2  # Smoothing factor

# Initialize camera
cap = cv2.VideoCapture(0)

# Initialize mediapipe model
mp_holistic = mp.solutions.holistic
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # Display countdown before starting predictions
    for i in range(5, 0, -1):
        ret, frame = cap.read()
        cv2.putText(frame, str(i), (320, 240), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4)
        cv2.imshow('Realtime LSTM Sign Language Detection', frame)
        cv2.waitKey(1000)
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Make detections
        image, results = landmarks_detection(frame, holistic)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]

        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            
            # Apply exponential smoothing
            if smoothed_probabilities is None:
                smoothed_probabilities = res
            else:
                smoothed_probabilities = alpha * res + (1 - alpha) * smoothed_probabilities
            
            current_prediction = np.argmax(smoothed_probabilities)
            predictions.append(current_prediction)
            
            # Visualization logic
            consistency_check_window = 10
            if len(predictions) >= consistency_check_window:
                most_common_pred, num_occurrences = Counter(predictions[-consistency_check_window:]).most_common(1)[0]
                if num_occurrences > consistency_check_window / 2 and smoothed_probabilities[most_common_pred] > threshold:
                    current_sign = signs[most_common_pred]
                    if len(sentence) > 0 and current_sign != sentence[-1]:
                        sentence.append(current_sign)
                    elif len(sentence) == 0:
                        sentence.append(current_sign)
            
            if len(sentence) > 5:
                sentence = sentence[-5:]
            
            # Display the sentence
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            cv2.putText(image, ' '.join(sentence), (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('Realtime LSTM Sign Language Detection', image)
        
        # Break gracefully
        if cv2.waitKey(10) & 0xFF == 27:
            break
    
    cap.release()
    cv2.destroyAllWindows()