In [2]:
import os
import cv2
import pickle
import time
import numpy as np
import pandas as pd
import mediapipe as mp
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import joblib

from language_processor_module import LanguageProcessor

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\shank\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\shank\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


KeyPoints using MP Holistic

In [3]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [4]:
def mediapipe_detections(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [5]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,)
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [6]:

def draw_styled_landmarks(image, results):
  # Draw Face connections
    mp_drawing.draw_landmarks(
        image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
        mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
        mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
    ) 
    
  # Draw Pose connections
    mp_drawing.draw_landmarks(
        image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
        mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
    )
    
  # Draw Left Hand connections
    mp_drawing.draw_landmarks(
        image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
        mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
    )
    
  # Draw Right Hand connections
    mp_drawing.draw_landmarks(
        image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
        mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
    )

In [7]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [8]:
# Load the pre-trained model
model_path = './TRAINED_MODELS/model_2o v2 LSTM_model__DateTime_2024_09_01__19_53_00__Loss_0.3630443811416626__Accuracy_0.8823529481887817.h5'
model = tf.keras.models.load_model(model_path)

# open a file, where you stored the pickled data
file = open('labels_npy.pkl', 'rb')

# dump information to that file
actions = pickle.load(file)

Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.


In [9]:
actions = {v: k for k, v in actions.items()}
actions

{0: 'GOOD',
 1: 'WELCOME',
 2: 'HOUSE',
 3: 'NO',
 4: 'NICE',
 5: 'HELLO',
 6: 'MORNING',
 7: 'WORK',
 8: 'THANK YOU',
 9: 'BYE',
 10: 'YES'}

Start capturing

In [11]:
# Create an instance of LanguageProcessor
processor = LanguageProcessor()

# Specify language code (e.g., 'hi' for Hindi)
language_code = "hi"

cap = cv2.VideoCapture(0)

sequence = []
sentence = []
thresold = 0.9

# Set MediaPipe Model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:
  while cap.isOpened():
      
        # Read Frames
        ret, frame = cap.read()

        # Make detection
        image, results = mediapipe_detections(frame, holistic_model)
        
        # Draw landmarks
        draw_styled_landmarks(frame, results)
        
        # Predict Logic
        keypoints = extract_keypoints(results)
        sequence.insert(0,keypoints)
        # sequence.append(keypoints)
        sequence = sequence[:20]
        
        if len(sequence) == 20 :
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(res[np.argmax(res)])
            
            # Viz Logic
            if res[np.argmax(res)] > thresold:
                if len(sentence) > 0:
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])
                    
                # print(actions[np.argmax(res)])
        
        if len(sentence) > 5:
            sentence = sentence[-5:]
        
        corrected_sentence = processor.correct_sentence(sentence)
        
        processor.process_text(corrected_sentence, language_code)
        
        cv2.rectangle(frame, (0,0), (640,40), (245,117,16), -1)
        cv2.putText(frame, ' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
        
        # Show Frame
        cv2.imshow("OpenCV Feeds", frame)
        
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    
    
cap.release()
cv2.destroyAllWindows()

LookupError: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('averaged_perceptron_tagger_eng')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtaggers/averaged_perceptron_tagger_eng/[0m

  Searched in:
    - 'C:\\Users\\shank/nltk_data'
    - 'c:\\Python312\\nltk_data'
    - 'c:\\Python312\\share\\nltk_data'
    - 'c:\\Python312\\lib\\nltk_data'
    - 'C:\\Users\\shank\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************
