In [None]:
import pickle
import cv2
import mediapipe as mp
import numpy as np
from collections import Counter

# Load the trained model from the pickle file
model_dict = pickle.load(open(r"artifacts/model.pkl", 'rb'))
model = model_dict['model']

# Initialize the VideoCapture object for the camera
cap = cv2.VideoCapture(0)

# Initialize Mediapipe hands solution
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

# Dictionary to map prediction labels to characters
labels_dict = {
    0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I',
    9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q',
    17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y',
    25: 'Z', 26: 'del', 27: 'space', 28: 'wait'
}

frame_predictions = []  # Buffer for last 10 predictions

try:
    while True:
        data_aux = []
        x_ = []
        y_ = []

        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to grab frame.")
            break

        H, W, _ = frame.shape
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        predicted_character = "..."

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style()
                )

            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x_.append(hand_landmarks.landmark[i].x)
                    y_.append(hand_landmarks.landmark[i].y)

                for i in range(len(hand_landmarks.landmark)):
                    data_aux.append(hand_landmarks.landmark[i].x - min(x_))
                    data_aux.append(hand_landmarks.landmark[i].y - min(y_))

            x1 = int(min(x_) * W) - 10
            y1 = int(min(y_) * H) - 10
            x2 = int(max(x_) * W) - 10
            y2 = int(max(y_) * H) - 10

            try:
                prediction = model.predict([np.asarray(data_aux)])
                frame_predictions.append(int(prediction[0]))

                # Keep only last 10 predictions
                if len(frame_predictions) > 20:
                    frame_predictions.pop(0)

                # Select the most frequent prediction in last 10 frames
                if frame_predictions:
                    most_common_prediction = Counter(frame_predictions).most_common(1)[0][0]
                    predicted_character = labels_dict[most_common_prediction]

            except Exception as e:
                pass

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
            cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                        cv2.LINE_AA)

        cv2.imshow('frame', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to exit the loop
            break

except Exception as e:
    print(f"Error: {e}")

finally:
    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)


In [None]:
import pickle
import cv2
import mediapipe as mp
import numpy as np
from collections import Counter

# Load the trained model from the pickle file
model_dict = pickle.load(open(r"artifacts/modelspace.pkl", 'rb'))
model = model_dict['model']

# Initialize the VideoCapture object for the camera
cap = cv2.VideoCapture(0)

# Initialize Mediapipe hands solution
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

# Dictionary to map prediction labels to characters
labels_dict = {
    0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I',
    9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q',
    17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y',
    25: 'Z', 26: 'space', 27: 'del', 28: 'wait'
}

frame_predictions = []  # Buffer for last 20 predictions
word_buffer = ""  # Stores the formed word
last_char = ""  # Tracks the last added character

try:
    while True:
        data_aux = []
        x_ = []
        y_ = []

        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to grab frame.")
            break

        H, W, _ = frame.shape
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        predicted_character = "..."

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style()
                )

            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x_.append(hand_landmarks.landmark[i].x)
                    y_.append(hand_landmarks.landmark[i].y)

                for i in range(len(hand_landmarks.landmark)):
                    data_aux.append(hand_landmarks.landmark[i].x - min(x_))
                    data_aux.append(hand_landmarks.landmark[i].y - min(y_))

            x1 = int(min(x_) * W) - 10
            y1 = int(min(y_) * H) - 10
            x2 = int(max(x_) * W) - 10
            y2 = int(max(y_) * H) - 10

            try:
                prediction = model.predict([np.asarray(data_aux)])
                frame_predictions.append(int(prediction[0]))

                # Keep only the last 20 predictions
                if len(frame_predictions) > 50:
                    frame_predictions.pop(0)

                # Once 20 frames are collected, take the most common prediction
                if len(frame_predictions) == 20:
                    most_common_prediction = Counter(frame_predictions).most_common(1)[0][0]
                    predicted_character = labels_dict[most_common_prediction]

                    # Updated character handling logic with majority voting
                    if predicted_character == "del" and word_buffer:
                        word_buffer = word_buffer[:-1]  # Remove the last character
                    elif predicted_character == "space" and word_buffer and word_buffer[-1] != " ":
                        word_buffer += " "  # Add space between characters
                    elif predicted_character != "wait" and predicted_character != last_char:
                        if predicted_character != "space":  # Prevent duplicate spaces
                            word_buffer += predicted_character  # Append character to word
                        last_char = predicted_character

                    # Clear frame_predictions after adding character to buffer
                    frame_predictions = []
            except Exception as e:
                pass

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
            cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)

        # Display the formed word
        cv2.putText(frame, f"Word: {word_buffer}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (255, 0, 0), 3, cv2.LINE_AA)

        cv2.imshow('frame', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to exit the loop
            break

except Exception as e:
    print(f"Error: {e}")

finally:
    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)
