In [None]:
import cv2
import numpy as np
import mediapipe as mp
import pickle
import tkinter as tk
from tkinter import StringVar
import time

# Load the RandomForest model
with open("sign_language_model.pkl", "rb") as file:
    model = pickle.load(file)

# Initialize Mediapipe for hand tracking
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.3, max_num_hands=1)

# Initialize video capture for the webcam
cap = cv2.VideoCapture(0)

# Create the Tkinter window
root = tk.Tk()
root.title("ASL Prediction")
root.geometry("400x200")

# Initialize text buffer for displaying the recognized text
recognized_text = StringVar()
recognized_text.set("")

# Timer for controlling the pace of writing
last_addition_time = time.time()
delay_between_additions = 0.7  # Reduced delay to avoid repetitive translations

# Determine the expected input length based on model's training
expected_input_length = len(model.feature_importances_)

# Function to update recognized text with a delay
def update_text(new_text):
    global last_addition_time
    current_time = time.time()
    if current_time - last_addition_time >= delay_between_additions:
        current_text = recognized_text.get()
        recognized_text.set(current_text + new_text)
        last_addition_time = current_time
        # Move cursor to end of text
        text_entry.icursor('end')

# Function to handle space and delete actions
def handle_action(action):
    global last_addition_time
    current_time = time.time()
    if current_time - last_addition_time >= delay_between_additions:
        current_text = recognized_text.get()
        if action == "space":
            recognized_text.set(current_text + " ")
        elif action == "delete" and current_text:
            recognized_text.set(current_text[:-1])
        last_addition_time = current_time
        # Move cursor to end of text
        text_entry.icursor('end')

# Function to clear the text
def clear_text():
    recognized_text.set("")
    text_entry.icursor('end')

# Add a text entry widget with visible cursor instead of a label
text_entry = tk.Entry(root, 
                     textvariable=recognized_text, 
                     font=("Arial", 14), 
                     justify="left",
                     width=50)
text_entry.pack(pady=10, padx=10, fill="x")
text_entry.config(insertwidth=2)  # Make cursor more visible
text_entry.focus()  # Give focus to the entry widget to show cursor

# Add buttons for space and delete actions
button_frame = tk.Frame(root)
space_button = tk.Button(button_frame, text="Space", command=lambda: handle_action("space"))
delete_button = tk.Button(button_frame, text="Delete", command=lambda: handle_action("delete"))
space_button.pack(side="left", padx=5)
delete_button.pack(side="left", padx=5)
clear_button = tk.Button(button_frame, text="Clear Text", command=clear_text)
clear_button.pack(side="left", padx=5)
button_frame.pack(pady=10)

# Run webcam feed and update recognized text
def run_camera():
    while True:
        # Read a frame from the webcam
        ret, frame = cap.read()
        if not ret:
            break

        # Prepare the frame for Mediapipe
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        # Lists to store landmarks
        data_aux, x_, y_ = [], [], []

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Draw hand landmarks on the frame
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                # Collect landmarks for prediction
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.extend([x - min(x_), y - min(y_)])

                # Predict the class if landmarks match expected input shape
                if len(data_aux) == expected_input_length:
                    prediction = model.predict([np.asarray(data_aux)])
                    predicted_class = prediction[0]
                    confidence = np.max(model.predict_proba([np.asarray(data_aux)]))

                    # Different confidence thresholds for different actions
                    if predicted_class.lower() == "delete" and confidence >= 0.3:  # Lower threshold for delete
                        handle_action("delete")
                    elif predicted_class.lower() == "space" and confidence >= 0.5:  # Regular threshold for space
                        handle_action("space")
                    elif confidence >= 0.5:  # Regular threshold for letters/numbers
                        update_text(predicted_class)

                    # Display the predicted class and confidence over the frame
                    cv2.putText(frame, f"Predicted: {predicted_class} ({confidence:.2f})", (50, 50), 
                              cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        # Display the webcam frame
        cv2.imshow("ASL Recognition", frame)

        # Exit on pressing 'q'
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()
    root.quit()  # Close Tkinter window after exiting OpenCV

# Run the camera in a separate thread to keep Tkinter responsive
import threading
camera_thread = threading.Thread(target=run_camera)
camera_thread.start()

# Start Tkinter main loop
root.mainloop()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
