In [None]:
import numpy as np
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image

# Load the model and define labels
model_path = "/Users/omar/Documents/Gradution_project/ASL Hand/98,97 Sign Language ALS Classifier.h5"
model = load_model(model_path)
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'del', 'nothing', 'space']

# OpenCV Video Capture (change to 1 for external camera like iPhone)
cap = cv2.VideoCapture(0)

# Font settings for text display
font = cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText = (100, 300)
fontScale = 2
fontColor = (255, 255, 255)
lineType = 2

# Loop to capture and process video frames
while True:
    ret, frame = cap.read()  # Capture frame
    if not ret:
        break

    # Determine the frame dimensions
    height, width, _ = frame.shape

    # Define the coordinates for the middle bounding box
    box_size = min(height, width) // 2  # Half of the smaller dimension
    center_x, center_y = width // 2, height // 2  # Middle of the frame

    # Calculate the coordinates for the bounding box
    box_top_left = (center_x - box_size // 1, center_y - box_size // 1)
    box_bottom_right = (center_x + box_size // 1, center_y + box_size // 1)

    # Draw the bounding box
    original = cv2.rectangle(frame, box_top_left, box_bottom_right, (0, 255, 0), 2)

    # Extract the image within the bounding box
    img = frame[box_top_left[1]:box_bottom_right[1], box_top_left[0]:box_bottom_right[0]]

    # Resize and normalize the image for prediction
    img_resized = cv2.resize(img, (224, 224))
    img_array = image.img_to_array(img_resized)
    img_normalized = cv2.normalize(img_array, None, alpha=-1, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

    # Get the prediction from the model
    result_probabilities = model.predict([np.array([img_normalized])])
    letter_index = np.argmax(result_probabilities)
    letter = labels[letter_index]

    # Display the predicted letter on the frame
    cv2.putText(original, letter, bottomLeftCornerOfText, font, fontScale, fontColor, lineType)

    # Show the frame with the bounding box and prediction
    cv2.imshow("ASL Prediction", original)

    # Exit on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()


In [None]:
import tkinter as tk
from tkinter import Frame, Label, Button
import cv2
from PIL import Image, ImageTk
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import numpy as np

# Load the ASL model
model_path = "/Users/omar/Documents/Gradution_project/ASL Hand/98,97 Sign Language ALS Classifier.h5"
model = load_model(model_path)
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'del', 'nothing', 'space']

# Initialize the video capture
cap = cv2.VideoCapture(1)  # Change index to 1 for external camera

def update_video():
    ret, frame = cap.read()
    if ret:
        frame = process_frame(frame)
        cv2image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
        img = Image.fromarray(cv2image)
        imgtk = ImageTk.PhotoImage(image=img)
        lmain.imgtk = imgtk
        lmain.configure(image=imgtk)
    lmain.after(10, update_video)

def process_frame(frame):
    # Drawing and prediction logic as per your original script
    height, width, _ = frame.shape
    box_size = min(height, width) // 2
    center_x, center_y = width // 2, height // 2
    box_top_left = (center_x - box_size // 2, center_y - box_size // 2)
    box_bottom_right = (center_x + box_size // 2, center_y + box_size // 2)
    frame = cv2.rectangle(frame, box_top_left, box_bottom_right, (0, 255, 0), 2)
    img = frame[box_top_left[1]:box_bottom_right[1], box_top_left[0]:box_bottom_right[0]]
    img_resized = cv2.resize(img, (224, 224))
    img_array = image.img_to_array(img_resized)
    img_normalized = cv2.normalize(img_array, None, alpha=-1, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    result_probabilities = model.predict([np.array([img_normalized])])
    letter_index = np.argmax(result_probabilities)
    letter = labels[letter_index]
    cv2.putText(frame, letter, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2)
    return frame

def start_video():
    update_video()

def stop_video():
    cap.release()
    cv2.destroyAllWindows()
    root.quit()

root = tk.Tk()
root.title("ASL Prediction GUI")

frame_video = Frame(root)
frame_video.pack()


lmain = Label(frame_video)
lmain.pack()

btn_start = Button(root, text="start webcam", command=start_video)
btn_start.pack(side=tk.LEFT)

btn_stop = Button(root, text="Stop Webcam", command=stop_video)
btn_stop.pack(side=tk.RIGHT)

root.mainloop()
