In [12]:
import cv2
import mediapipe as mp
import csv
from tensorflow.keras.models import load_model
import pickle
import pandas as pd

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

In [6]:
with open("models/lr_alpha_ASL.pkl", "rb") as f:
    model = pickle.load(f)

In [25]:
import string
letters = list(string.ascii_uppercase)
numbers = [str(num) for num in range(0, 10)]
class_names = letters + numbers
class_names.remove('J')
class_names.remove('Z')

## 1. Without Transformer for Text Correction

In [None]:
import time
cap = cv2.VideoCapture(1)


with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    while cap.isOpened():

        ret, frame = cap.read()

        # Flip image
        image = cv2.flip(frame, 1)

        # Convert from bgr 2 rgb
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        image.flags.writeable = False

        results = holistic.process(image)

        image.flags.writeable = True
        
        
        try:
            row = []
            for num, landmark in enumerate(results.left_hand_landmarks.landmark):
                row += [landmark.x, landmark.y, landmark.z, landmark.visibility]

            row = pd.DataFrame([row])
            preds = model.predict(row)[0]
            predict_proba = model.predict_proba(row)[0]
            predict_proba = max(predict_proba)
            cv2.rectangle(image, (0,0), (500, 100), (0, 0, 255), -1)
            cv2.putText(image, class_names[preds], (5, 70), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)
            cv2.putText(image, str(round(predict_proba,2)), (100, 70), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)
            mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

            
        except Exception as error:
            pass

        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        cv2.imshow('ASL', image)
        if cv2.waitKey(10) & 0XFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)


I0000 00:00:1714366360.623127       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2


In [None]:
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

## 2. With Transformer for Text Correction

In [None]:
# Load nlp library for text correction
!pip3 install transformers
from transformers import pipeline

In [None]:
import time
cap = cv2.VideoCapture(1)

phrase = ""
last_letter = None
letter_added = False
start = time.time()
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    while cap.isOpened():

        ret, frame = cap.read()

        # Flip image
        image = cv2.flip(frame, 1)

        # Convert from bgr 2 rgb
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        image.flags.writeable = False

        results = holistic.process(image)

        image.flags.writeable = True
        
        
        try:
            row = []
            for num, landmark in enumerate(results.left_hand_landmarks.landmark):
                row += [landmark.x, landmark.y, landmark.z, landmark.visibility]

            row = pd.DataFrame([row])
            preds = fit_models['lr'].predict(row)[0]
            predict_proba = fit_models['lr'].predict_proba(row)[0]
            predict_proba = max(predict_proba)
            cv2.rectangle(image, (0,0), (500, 100), (0, 0, 255), -1)
            cv2.putText(image, preds, (5, 70), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)
            cv2.putText(image, str(round(predict_proba,2)), (100, 70), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)
            mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

            if preds == last_letter:
                end = time.time()
                if end - start > 1:
                    phrase += preds
                    letter_added = True
                    start = end
            else:
                last_letter = preds
                start = time.time()

            
        except Exception as error:
            if last_letter is not None:
                last_letter = None
                start = time.time()
            pass    

        cv2.rectangle(image, (700,0), (1600, 100), (0, 0, 255), -1)
        if phrase:
            phrase = phrase.title()
            
            if not last_letter and time.time()-start > 2 and letter_added:
                letter_added = False
                clean_text = fix_spelling(phrase, max_length = 20)
                phrase = clean_text[0]['generated_text']
                phrase = phrase.replace('.','')
                
            cv2.putText(image, phrase, (705, 70), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        cv2.imshow('ASL', image)
        if cv2.waitKey(10) & 0XFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)


In [None]:
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)