In [5]:
import cv2
import os
import numpy as np
import mediapipe as mp
from mediapipe.python.solutions.holistic import Holistic
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
from scipy import stats
from tensorflow.keras.models import load_model

In [17]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
from collections import deque

# Initialize Mediapipe models
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Extract keypoints
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# Mediapipe detection
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

# Improved visualization
def draw_styled_landmarks(image, results):
    # Define Colors
    face_color = (0, 255, 255)
    pose_color = (0, 255, 0)
    left_hand_color = (255, 0, 0)
    right_hand_color = (0, 0, 255)

    if results.face_landmarks:
        mp_drawing.draw_landmarks(
            image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
            mp_drawing.DrawingSpec(color=face_color, thickness=1, circle_radius=1),
            mp_drawing.DrawingSpec(color=(0, 100, 255), thickness=1, circle_radius=1)
        )

    if results.pose_landmarks:
        mp_drawing.draw_landmarks(
            image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
            mp_drawing.DrawingSpec(color=pose_color, thickness=3, circle_radius=5),
            mp_drawing.DrawingSpec(color=(0, 150, 0), thickness=2, circle_radius=3)
        )

    if results.left_hand_landmarks:
        for i, landmark in enumerate(results.left_hand_landmarks.landmark):
            x, y = int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0])
            cv2.circle(image, (x, y), 6, (255 - i * 10, 50 + i * 10, 255), -1)

    if results.right_hand_landmarks:
        for i, landmark in enumerate(results.right_hand_landmarks.landmark):
            x, y = int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0])
            cv2.circle(image, (x, y), 6, (50 + i * 10, 255 - i * 10, 255), -1)

# Smoothing class
class SmoothPrediction:
    def __init__(self, window_size=10):
        self.window_size = window_size
        self.buffer = deque(maxlen=window_size)

    def update(self, prediction):
        self.buffer.append(prediction)
        return max(set(self.buffer), key=self.buffer.count)

# Define actions/classes
actions = np.array(['hello', 'Indian', 'bye', 'my', 'you', 'man', 'girl', 'name', 'thank you', 'what', 'yes', 'no'])

# Load model
model = load_model('test 12-30.h5')

sequence = []
sentence = []
predictions = []
threshold = 0.7

smooth_predictor = SmoothPrediction(window_size=10)

# Start video capture
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)

        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]

        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            prediction = np.argmax(res)
            stable_prediction = smooth_predictor.update(prediction)

            predictions.append(stable_prediction)

            if np.unique(predictions[-10:])[0] == stable_prediction and res[stable_prediction] > threshold:
                if len(sentence) == 0 or actions[stable_prediction] != sentence[-1]:
                    sentence.append(actions[stable_prediction])

            if len(sentence) > 5:
                sentence = sentence[-5:]

        cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (10, 30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        cv2.imshow('OpenCV Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


