In [None]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
import os

# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

class SignLanguageDetector:
    def __init__(self):
        # Load your trained model
        self.model = load_model('best_model.keras')
        self.actions = ['hello', 'my', 'name', 'is','i', 'study', 'in', 'department','am']
        self.sequence_length = 40  # Must match training
        
        # Detection variables
        self.sequence = []
        self.sentence = []
        self.predictions = []
        self.threshold = 0.7  # Confidence threshold
        
        # Visualization settings
        self.colors = [
            (245, 117, 16), (117, 245, 16), (16, 117, 245), 
            (200, 100, 200), (100, 200, 100), (100, 100, 200),
            (200, 200, 100), (200, 100, 100), (100, 200, 200)
        ]
        
    def extract_keypoints(self, results):
        """Extract keypoints in same format as training"""
        pose = np.array([[res.x, res.y, res.z, res.visibility] 
                        for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
        face = np.array([[res.x, res.y, res.z] 
                        for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
        lh = np.array([[res.x, res.y, res.z] 
                      for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
        rh = np.array([[res.x, res.y, res.z] 
                      for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
        return np.concatenate([pose, face, lh, rh])

    def draw_landmarks(self, image, results):
        """Draw detected landmarks"""
        # Draw face connections
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                                mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))
        # Draw pose connections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=2), 
                                mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2))
        # Draw hand connections
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=2), 
                                mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2))
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2), 
                                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

    def draw_prediction_info(self, image, action, confidence):
        """Visualize prediction information"""
        try:
           color_idx = self.actions.index(action) % len(self.colors)  # Ensure we don't go out of bounds
           # Draw action name
           cv2.rectangle(image, (0,0), (640, 40), self.colors[color_idx], -1)
           cv2.putText(image, f'{action.upper()}', (10,30), 
                   cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
           # Draw confidence bar
           cv2.rectangle(image, (0,40), (int(confidence*640), 60), self.colors[color_idx], -1)
           cv2.putText(image, f'Confidence: {confidence:.2f}', (10,55), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA)
        
           # Draw sentence history
           cv2.rectangle(image, (0,440), (640, 480), (245, 117, 16), -1)
           cv2.putText(image, ' '.join(self.sentence[-3:]), (10,465), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 1, cv2.LINE_AA)
        except Exception as e:
            print(f"Error in visualization: {e}")

    def process_frame(self, image):
        """Process a single frame for detection"""
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = self.holistic.process(image)
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
         # Extract keypoints and maintain sequence
        keypoints = self.extract_keypoints(results)
        self.sequence.append(keypoints)
        self.sequence = self.sequence[-self.sequence_length:]
    
        # Make prediction when we have enough frames
        if len(self.sequence) == self.sequence_length:
            res = self.model.predict(np.expand_dims(self.sequence, axis=0))[0]
        
             # Add this check right after getting the prediction
            if len(res) != len(self.actions):
                print(f"Warning: Model output dimension ({len(res)}) doesn't match actions count ({len(self.actions)})")
                return image
            
            self.predictions.append(np.argmax(res))
        
            # Only accept predictions with high confidence and consistency
            if np.unique(self.predictions[-10:])[0] == np.argmax(res) and res[np.argmax(res)] > self.threshold:
                if len(self.sentence) == 0 or self.actions[np.argmax(res)] != self.sentence[-1]:
                    self.sentence.append(self.actions[np.argmax(res)])
                    self.sentence = self.sentence[-3:]  # Keep last 3 predictions
        
            # Visualize
            self.draw_landmarks(image, results)
            self.draw_prediction_info(image, self.actions[np.argmax(res)], res[np.argmax(res)])
    
        return image

    def run_real_time_detection(self):
        """Run the real-time detection loop"""
        cap = cv2.VideoCapture(0)
        if not cap.isOpened():
            print("Error: Could not open webcam")
            return
        
        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as self.holistic:
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    continue
                
                # Process frame
                processed_frame = self.process_frame(frame)
                
                # Show result
                cv2.imshow('Sign Language Detection', processed_frame)
                
                # Exit on 'q' key
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
            
            cap.release()
            cv2.destroyAllWindows()

if __name__ == "__main__":
    if not os.path.exists('best_model.keras'):
        print("Error: Model file 'sign_language_model.keras' not found")
    else:
        detector = SignLanguageDetector()
        print("Starting real-time detection. Press 'q' to quit.")
        detector.run_real_time_detection()

Starting real-time detection. Press 'q' to quit.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 795ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [