In [None]:
import cv2
import numpy as np
import pyttsx3
import threading
from tensorflow.keras.models import model_from_json
import time

# Load model
json_file = open(r'C:\Users\mehak\signlanguagedetectionmodel48x48.json', "r")
model_json = json_file.read()
json_file.close()
model = model_from_json(model_json)
model.load_weights(r'C:\Users\mehak\signlanguagedetectionmodel48x48.h5')

labels = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + ['blank']

def extract_features(image):
    feature = np.array(image).reshape(1, 48, 48, 1)
    return feature / 255.0

def speak_text(text):
    if text.strip():
        threading.Thread(target=_speak, args=(text,), daemon=True).start()

def _speak(text):
    engine = pyttsx3.init()
    engine.say(text)
    engine.runAndWait()

def vertical_gradient(height, width, top_color, bottom_color):
    gradient = np.zeros((height, width, 3), dtype=np.uint8)
    for y in range(height):
        alpha = y / height
        color = (np.array(top_color) * (1 - alpha) + np.array(bottom_color) * alpha).astype(np.uint8)
        gradient[y, :] = color
    return gradient

cap = cv2.VideoCapture(0)
sentence = ''
last_label = ''
frame_count = 0
cooldown = 10
repeat_cooldown = 15
repeat_counter = repeat_cooldown
building_sentence = False
pred_label = ''
confidence = 0
prev_sentences = []

frame_width = 1280
frame_height = 720
cam_width = 720
cam_height = 540

x_offset = 50
y_offset = 50

cursor_on = True
last_cursor_toggle = time.time()
cursor_interval = 0.5

cv2.namedWindow("Sign Language Recognition", cv2.WND_PROP_FULLSCREEN)
cv2.setWindowProperty("Sign Language Recognition", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.resize(frame, (cam_width, cam_height))
    full_frame = vertical_gradient(frame_height, frame_width, (240, 240, 255), (180, 210, 255))

    full_frame[y_offset:y_offset + cam_height, x_offset:x_offset + cam_width] = frame

    # ROI rectangle - a bit larger: 250x250
    roi_size = 250
    roi_x1, roi_y1 = x_offset + 10, y_offset + 30
    roi_x2, roi_y2 = roi_x1 + roi_size, roi_y1 + roi_size
    cv2.rectangle(full_frame, (roi_x1, roi_y1), (roi_x2, roi_y2), (0, 120, 255), 3)

    # Extract ROI from frame for processing
    roi = frame[roi_y1 - y_offset:roi_y2 - y_offset, roi_x1 - x_offset:roi_x2 - x_offset]

    # Skin color segmentation to detect hand
    hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
    lower_skin = np.array([0, 20, 70], dtype=np.uint8)
    upper_skin = np.array([20, 255, 255], dtype=np.uint8)
    mask = cv2.inRange(hsv_roi, lower_skin, upper_skin)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    mask = cv2.erode(mask, kernel, iterations=1)
    mask = cv2.dilate(mask, kernel, iterations=2)
    mask = cv2.GaussianBlur(mask, (3, 3), 0)

    # Find contours on the mask
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Draw green dots on contour points if contours found
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        for point in largest_contour:
            x, y = point[0]
            if 0 <= x < roi.shape[1] and 0 <= y < roi.shape[0]:
                cv2.circle(roi, (x, y), 3, (0, 255, 0), -1)  # green dot

    # Prepare grayscale and resize for prediction
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (48, 48))
    input_img = extract_features(resized)

    frame_count += 1
    if frame_count % cooldown == 0:
        pred = model.predict(input_img)
        pred_label = labels[np.argmax(pred)]
        confidence = np.max(pred)

        if building_sentence:
            if pred_label == 'blank':
                last_label = ''
                repeat_counter = repeat_cooldown
            elif pred_label == last_label:
                repeat_counter += 1
                if repeat_counter >= repeat_cooldown:
                    sentence += pred_label
                    speak_text(pred_label)
                    repeat_counter = 0
            else:
                sentence += pred_label
                speak_text(pred_label)
                last_label = pred_label
                repeat_counter = 0

    # Heading
    text_pos = (frame_width // 2 - 110, 45)
    cv2.putText(full_frame, "ASL Model", (text_pos[0] + 2, text_pos[1] + 2), cv2.FONT_HERSHEY_SIMPLEX, 1.4, (50, 50, 50), 5)
    cv2.putText(full_frame, "ASL Model", text_pos, cv2.FONT_HERSHEY_SIMPLEX, 1.4, (110, 0, 190), 5)

    # Prediction label
    label_bg_top_left = (roi_x1, roi_y1 - 45)
    label_bg_bottom_right = (roi_x1 + 220, roi_y1 - 5)
    cv2.rectangle(full_frame, label_bg_top_left, label_bg_bottom_right, (0, 120, 255), -1)
    label_text = f"{pred_label}   {confidence * 100:.2f}%"
    cv2.putText(full_frame, label_text, (roi_x1 + 10, roi_y1 - 15),
                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)

    # Sentence Box
    sent_box_tl = (50, 580)
    sent_box_br = (880, 640)
    cv2.rectangle(full_frame, sent_box_tl, sent_box_br, (245, 245, 245), -1)
    cv2.rectangle(full_frame, sent_box_tl, sent_box_br, (0, 150, 50), 3)

    current_time = time.time()
    if current_time - last_cursor_toggle > cursor_interval:
        cursor_on = not cursor_on
        last_cursor_toggle = current_time

    display_sentence = sentence
    if building_sentence and cursor_on:
        display_sentence += '|'

    cv2.putText(full_frame, "Sentence:", (sent_box_tl[0] + 15, sent_box_tl[1] + 40),
                cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 120, 0), 2)
    cv2.putText(full_frame, display_sentence, (sent_box_tl[0] + 150, sent_box_tl[1] + 40),
                cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 90, 0), 2)

    # Previous Sentences on Right
    rect_top_left = (910, 50)
    rect_bottom_right = (frame_width - 30, frame_height - 60)
    cv2.rectangle(full_frame, rect_top_left, rect_bottom_right, (230, 230, 230), -1)
    cv2.rectangle(full_frame, rect_top_left, rect_bottom_right, (100, 100, 100), 2)

    cv2.putText(full_frame, "Previous Sentences:", (rect_top_left[0] + 15, rect_top_left[1] + 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 50, 130), 2)

    for i, prev_sent in enumerate(prev_sentences[-18:][::-1]):
        y_pos = rect_top_left[1] + 60 + i * 25
        if y_pos < rect_bottom_right[1] - 10:
            cv2.putText(full_frame, f"- {prev_sent}", (rect_top_left[0] + 15, y_pos),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (50, 50, 50), 1)

    # Instructions Bar
    instructions = "Press  's'=Start/Stop  |  'b'=Space  |  'p'=Speak  |  'c'=Clear+Save  |  ESC=Exit"
    cv2.rectangle(full_frame, (0, frame_height - 40), (frame_width, frame_height), (100, 100, 100), -1)
    cv2.putText(full_frame, instructions, (50, frame_height - 12), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

    cv2.imshow("Sign Language Recognition", full_frame)

    key = cv2.waitKey(1) & 0xFF
    if key == 27:
        break
    elif key == ord('s'):
        building_sentence = not building_sentence
    elif key == ord('p'):
        if sentence.strip():
            speak_text(sentence)
    elif key == ord('c'):
        if sentence.strip():
            prev_sentences.append(sentence)
        sentence = ''
        last_label = ''
        repeat_counter = repeat_cooldown
    elif key == ord('b'):
        sentence += ' '

cap.release()
cv2.destroyAllWindows()
