In [2]:
from flask import Flask, render_template, request
from flask_socketio import SocketIO, emit
import cv2
import mediapipe as mp
import numpy as np
import pickle
from collections import deque
from autocorrect import Speller
import base64

app = Flask(__name__)
socketio = SocketIO(app)

# Load the trained model
model_path = 'model.p'  # Replace with the correct model path
with open(model_path, 'rb') as f:
    model_dict = pickle.load(f)
model = model_dict['model']

# Initialize MediaPipe Hands for hand detection
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.3)

# Speller for autocorrecting words
spell = Speller(lang='en')

# Define a deque to store detected letters
detected_letters = deque(maxlen=20)

@app.route('/')
def index():
    return render_template('index.html')

# WebSocket connection to receive frames from the front end
@socketio.on('frame')
def handle_frame(data):
    frame = decode_base64_image(data['frame'])  # Assume the image is sent in base64
    letter = process_frame_with_model(frame)
    
    if letter:
        detected_letters.append(letter)
        word = ''.join(detected_letters)
        corrected_word = spell(word)
        emit('letter', {'letter': letter, 'corrected_word': corrected_word})

def process_frame_with_model(frame):
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            data_aux = []
            x_ = []
            y_ = []
            for landmark in hand_landmarks.landmark:
                x_.append(landmark.x)
                y_.append(landmark.y)

            for landmark in hand_landmarks.landmark:
                data_aux.append(landmark.x - min(x_))
                data_aux.append(landmark.y - min(y_))
            
            data_aux = np.array(data_aux).reshape(1, -1)
            prediction = model.predict(data_aux)
            alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
            return alphabet[prediction[0]]
    return None

def decode_base64_image(data):
    # Convert base64 to a NumPy array (Image)
    img_data = base64.b64decode(data.split(",")[1])
    np_arr = np.frombuffer(img_data, np.uint8)
    return cv2.imdecode(np_arr, cv2.IMREAD_COLOR)

if __name__ == '__main__':
    socketio.run(app, debug=True, allow_unsafe_werkzeug=True)


Werkzeug appears to be used in a production deployment. Consider switching to a production web server instead.


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [3]:
import cv2
import mediapipe as mp
import numpy as np
import pickle
from collections import deque
from autocorrect import Speller

# Load the trained model
model_path = 'model.p'  # Update with the correct path to your model
with open(model_path, 'rb') as f:
    model_dict = pickle.load(f)
model = model_dict['model']

# Initialize MediaPipe Hands for hand detection
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.3)

# Speller for autocorrecting words
spell = Speller(lang='en')

# Define a deque to store detected letters
detected_letters = deque(maxlen=20)  # Adjust maxlen based on how many letters you want to store

# Function to combine letters into words and autocorrect them
def form_words_and_autocorrect(detected_letters):
    word = ''.join(detected_letters)
    corrected_word = spell(word)
    return corrected_word

# Function to process the frame with the trained model and detect a letter
def process_frame_with_model(frame):
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB for MediaPipe processing
    results = hands.process(img_rgb)

    # If hand landmarks are detected, predict the letter using the model
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            data_aux = []
            x_ = []
            y_ = []

            # Collect x and y coordinates
            for landmark in hand_landmarks.landmark:
                x = landmark.x
                y = landmark.y
                x_.append(x)
                y_.append(y)

            # Normalize the coordinates
            for landmark in hand_landmarks.landmark:
                data_aux.append(landmark.x - min(x_))
                data_aux.append(landmark.y - min(y_))

            # Convert the landmarks to a numpy array
            data_aux = np.array(data_aux).reshape(1, -1)

            # Predict using the model
            prediction = model.predict(data_aux)

            # Get the predicted letter (assuming it's mapped as an alphabet letter)
            alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
            return alphabet[prediction[0]]

    return None

# Capture video from webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

detected_letters = deque(maxlen=20)

while True:
    ret, frame = cap.read()

    if not ret:
        print("Error: Could not read frame.")
        break

    # Process the frame with the model
    predicted_letter = process_frame_with_model(frame)

    if predicted_letter:
        detected_letters.append(predicted_letter)
        corrected_word = form_words_and_autocorrect(detected_letters)
        print(f"Detected Letter: {predicted_letter}, Corrected Word: {corrected_word}")

    # Display the frame
    cv2.imshow('Webcam Feed', frame)

    # Press 'q' to quit the program
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Werkzeug appears to be used in a production deployment. Consider switching to a production web server instead.


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1