In [1]:
!pip install mediapipe


Defaulting to user installation because normal site-packages is not writeable
Collecting mediapipe
  Using cached mediapipe-0.10.3-cp311-cp311-win_amd64.whl (50.2 MB)
Collecting opencv-contrib-python (from mediapipe)
  Using cached opencv_contrib_python-4.8.0.76-cp37-abi3-win_amd64.whl (44.8 MB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Using cached sounddevice-0.4.6-py3-none-win_amd64.whl (199 kB)
Installing collected packages: opencv-contrib-python, sounddevice, mediapipe
Successfully installed mediapipe-0.10.3 opencv-contrib-python-4.8.0.76 sounddevice-0.4.6


In [1]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp

# Initialize MediaPipe Hand solution
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Load the trained model
model = tf.keras.models.load_model('asl_classifier.h5')

# Prepare a list of symbols (the order should correspond to the class indices used during training)
symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
           'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
           'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

# Start video capture
cap = cv2.VideoCapture(0)

with mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5) as hands:
    while True:
        # Read a frame from the webcam
        ret, frame = cap.read()

        if not ret:
            break

        # Flip the image horizontally
        frame = cv2.flip(frame, 1)
    
        # Convert the image color to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame with MediaPipe Hand
        results = hands.process(rgb_frame)

        # Draw hand landmarks
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Get the region of interest
            coords = None
            for hand_landmarks in results.multi_hand_landmarks:
                coords = tuple(np.multiply(
                    np.array(
                        (hand_landmarks.landmark[mp_hands.HandLandmark.WRIST].x,
                         hand_landmarks.landmark[mp_hands.HandLandmark.WRIST].y)),
                    [640, 480]).astype(int))

                if coords[0] + 200 <= 640 and coords[1] + 200 <= 480:
                    break

            if coords is not None and len(coords) == 2:
                x, y = coords
                roi = cv2.resize(rgb_frame[y:y + 200, x:x + 200], (200, 200))

                # Resize ROI to match the model input shape
                roi = tf.image.resize(roi, (100, 100))

                # Make prediction
                prediction = model.predict(tf.expand_dims(roi, axis=0))

                # Show prediction
                cv2.putText(frame, symbols[np.argmax(prediction)], (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)

        # Show the frame
        cv2.imshow('Sign Language Recognition', frame)

        # Quit with 'q' key
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release the video capture
cap.release()

# Close all OpenCV windows
cv2.destroyAllWindows()

ValueError: in user code:

    File "C:\Users\cyril\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 2341, in predict_function  *
        return step_function(self, iterator)
    File "C:\Users\cyril\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 2327, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\cyril\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 2315, in run_step  **
        outputs = model.predict_step(data)
    File "C:\Users\cyril\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 2283, in predict_step
        return self(x, training=False)
    File "C:\Users\cyril\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\cyril\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_2" is incompatible with the layer: expected shape=(None, 128, 128, 1), found shape=(None, 100, 100, 3)


In [3]:
import numpy as np
import tensorflow as tf
from PIL import Image
import cv2

# Load the trained model
model = tf.keras.models.load_model('model_with_regularization.keras')

# Prepare a list of symbols (the order should correspond to the class indices used during training)
symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
           'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
           'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

# Load and preprocess the uploaded image
uploaded_image_path = 'C:\\Users\\cyril\\PycharmProjects\\Sign2Speech\\processed_data\\A\\4.jpg'
img = Image.open(uploaded_image_path)
img = img.resize((100, 100))
img_array = np.array(img) / 255.0
roi = np.expand_dims(img_array, axis=0)

# Make prediction
prediction = model.predict(roi)

# Get the predicted sign label
predicted_label = np.argmax(prediction)
predicted_sign = symbols[predicted_label]

# Display the prediction result on the uploaded image
img = np.array(img)  # Convert PIL Image to numpy array
cv2.putText(img, predicted_sign, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)

# Resize the image for better visualization
resized_img = cv2.resize(img, (800, 600))

# Display the resized image with the prediction result
cv2.imshow('Uploaded Image with Prediction', resized_img)
cv2.waitKey(0)
cv2.destroyAllWindows()


