# Testing Both Models with Real-Time Inference Without Floating Keyboard

In [None]:
import pickle
import cv2
import mediapipe as mp
import numpy as np
import time
from collections import deque, Counter
import arabic_reshaper
from bidi.algorithm import get_display
from PIL import ImageFont, ImageDraw, Image

# ------------------------------
# Load English and Arabic models
# ------------------------------
model_en = pickle.load(open('./En_Model.p', 'rb'))['model']
model_ar = pickle.load(open('./Ar_Model.p', 'rb'))['model']

# ------------------------------
# Labels for both languages
# ------------------------------
english_letters = [chr(65+i) for i in range(26)]  # A-Z
labels_dict_en = {i: english_letters[i] for i in range(len(english_letters))}
labels_dict_en[len(english_letters)] = "Space"
labels_dict_en[len(english_letters)+1] = "Backspace"

arabic_letters = ["ا", "ب", "ت", "ث", "ج", "ح", "خ",
                  "د", "ذ", "ر", "ز", "س", "ش", "ص", "ض",
                  "ط", "ظ", "ع", "غ", "ف", "ق", "ك", "ل",
                  "م", "ن", "ه", "و", "ي"]
labels_dict_ar = {i: arabic_letters[i] for i in range(len(arabic_letters))}
labels_dict_ar[len(arabic_letters)] = "Space"
labels_dict_ar[len(arabic_letters)+1] = "Backspace"

# ------------------------------
# Camera
# ------------------------------
cap = cv2.VideoCapture(0)

# Mediapipe setup
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.5)

# ------------------------------
# Sentence tracking
# ------------------------------
sentence_en = ""
sentence_ar = ""
predictions_queue = deque(maxlen=20)
last_added_char = ""
last_time_added = time.time()
ADD_LETTER_DELAY = 3.0

# ------------------------------
# Scanning effect variables
# ------------------------------
scan_start_time = 0
scan_duration = 0.6
scanning = False

# ------------------------------
# Current language
# ------------------------------
current_language = "EN"  # Start with English

# ------------------------------
# Helpers
# ------------------------------
def render_arabic_box(frame, text, position, font_size=40):
    reshaped = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped)

    img_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(img_pil, "RGBA")  # نرسم بخلفية شفافة
    font = ImageFont.truetype("arial.ttf", font_size)

    try:
        bbox = draw.textbbox((0, 0), bidi_text, font=font)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
    except AttributeError:
        text_width, text_height = draw.textsize(bidi_text, font=font)

    x, y = position
    x = x - text_width  

    padding = 10
    rect_x1, rect_y1 = x - padding, y - padding
    rect_x2, rect_y2 = x + text_width + padding, y + text_height + padding
    
    draw.rectangle([rect_x1, rect_y1, rect_x2, rect_y2], fill=(255, 255, 255, 180))

    draw.text((x, y), bidi_text, font=font, fill=(0, 0, 0, 255))
    return cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)

def render_english_box(frame, text, position, font_size=1.2):
    text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_size, 2)
    text_width, text_height = text_size

    x, y = position
    padding = 10
    rect_x1, rect_y1 = x - padding, y - text_height - padding
    rect_x2, rect_y2 = x + text_width + padding, y + padding

    overlay = frame.copy()
    cv2.rectangle(overlay, (rect_x1, rect_y1), (rect_x2, rect_y2), (255, 255, 255), -1)
    alpha = 0.5  
    frame = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0)

    cv2.putText(frame, text, (x, y),
                cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), 3, cv2.LINE_AA)
    return frame


def put_arabic_text(frame, text, position, font_size=32, color=(0,0,0)):
    reshaped = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped)

    img_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(img_pil)
    font = ImageFont.truetype("arial.ttf", font_size)

    draw.text(position, bidi_text, font=font, fill=color)

    return cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)

def draw_camera_box(img, x1, y1, x2, y2, color=(0, 0, 255), thickness=3):
    cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness)

# ------------------------------
# Main loop
# ------------------------------
while True:
    data_aux = []
    ret, frame = cap.read()
    H, W, _ = frame.shape
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    current_time = time.time()

    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        mp_drawing.draw_landmarks(
            frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style()
        )

        x_, y_ = [], []
        data_aux = []
        for landmark in hand_landmarks.landmark:
            x_.append(landmark.x)
            y_.append(landmark.y)
        for landmark in hand_landmarks.landmark:
            data_aux.append(landmark.x - min(x_))
            data_aux.append(landmark.y - min(y_))

        x1, y1 = int(min(x_) * W) - 20, int(min(y_) * H) - 20
        x2, y2 = int(max(x_) * W) + 20, int(max(y_) * H) + 20

        if current_language == "EN":
            prediction = model_en.predict([np.asarray(data_aux)])
            predicted_character = labels_dict_en[int(prediction[0])]
        else:
            prediction = model_ar.predict([np.asarray(data_aux)])
            predicted_character = labels_dict_ar[int(prediction[0])]

        predictions_queue.append(predicted_character)
        most_common_char, count = Counter(predictions_queue).most_common(1)[0]

        if count > 15 and (most_common_char != last_added_char or current_time - last_time_added > ADD_LETTER_DELAY):
            if most_common_char == "Space":
                if current_language == "EN":
                    sentence_en += " "
                else:
                    sentence_ar += " "
            elif most_common_char == "Backspace":
                if current_language == "EN":
                    sentence_en = sentence_en[:-1]
                else:
                    sentence_ar = sentence_ar[:-1]
            else:
                if current_language == "EN":
                    sentence_en += most_common_char
                else:
                    sentence_ar += most_common_char
            last_added_char = most_common_char
            last_time_added = current_time
            scan_start_time = current_time
            scanning = True

        draw_camera_box(frame, x1, y1, x2, y2)

        if scanning and current_time - scan_start_time < scan_duration:
            progress = (current_time - scan_start_time) / scan_duration
            scan_y = int(y1 + progress * (y2 - y1))
            cv2.line(frame, (x1, scan_y), (x2, scan_y), (0, 255, 0), 2)
        else:
            scanning = False

        # ---------------- Show detected char above hand ----------------
        if current_language == "EN":
            cv2.putText(frame, most_common_char, (x1, y1 - 15),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 0), 3)
        else:
            display_char = most_common_char
            if most_common_char == "Space":
                display_char = "مسافة"
            elif most_common_char == "Backspace":
                display_char = "حذف"

            frame = put_arabic_text(frame, display_char, (x1, y1 - 40),
                                    font_size=36, color=(0,0,0))

    # -------- Show sentences --------
    if sentence_en.strip():
        frame = render_english_box(frame, sentence_en, position=(30, 70))  # top-left quarter

    if sentence_ar.strip():
        frame = render_arabic_box(frame, sentence_ar, position=(W//2 - 20, 120))  # right half under English

    # Show current language
    cv2.putText(frame, f"Language: {current_language}", (30, H-30),
                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2)

    cv2.imshow("frame", frame)

    key = cv2.waitKey(1)
    if key == ord("q"):
        break
    elif key == ord("c"):
        sentence_en = ""
        sentence_ar = ""
        last_added_char = ""
        predictions_queue.clear()
    elif key == 32:  # spacebar → switch language
        current_language = "AR" if current_language == "EN" else "EN"
        predictions_queue.clear()
    elif key == ord("z"):  # backspace
        if current_language == "EN" and sentence_en:
            sentence_en = sentence_en[:-1]
        elif current_language == "AR" and sentence_ar:
            sentence_ar = sentence_ar[:-1]
        last_added_char = ""
        predictions_queue.clear()

cap.release()
cv2.destroyAllWindows()


#

#

#

#

#

#

# Adding Floating Keyboard

In [1]:
import pickle
import cv2
import mediapipe as mp
import numpy as np
import time
from collections import deque, Counter
import arabic_reshaper
from bidi.algorithm import get_display
from PIL import ImageFont, ImageDraw, Image
import tkinter as tk

# Load English and Arabic models
model_en = pickle.load(open('./En_Model.p', 'rb'))['model']
model_ar = pickle.load(open('./Ar_Model.p', 'rb'))['model']

In [2]:
# Labels for both languages
english_letters = [chr(65+i) for i in range(26)]  # A-Z
labels_dict_en = {i: english_letters[i] for i in range(len(english_letters))}
labels_dict_en[len(english_letters)] = "Space"
labels_dict_en[len(english_letters)+1] = "Backspace"

arabic_letters = ["ا", "ب", "ت", "ث", "ج", "ح", "خ",
                  "د", "ذ", "ر", "ز", "س", "ش", "ص", "ض",
                  "ط", "ظ", "ع", "غ", "ف", "ق", "ك", "ل",
                  "م", "ن", "ه", "و", "ي"]
labels_dict_ar = {i: arabic_letters[i] for i in range(len(arabic_letters))}
labels_dict_ar[len(arabic_letters)] = "Space"
labels_dict_ar[len(arabic_letters)+1] = "Backspace"

In [3]:
# Camera
cap = cv2.VideoCapture(0)

# Mediapipe setup
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.5)

# Sentence tracking
sentence_en = ""
sentence_ar = ""
predictions_queue = deque(maxlen=20)
last_added_char = ""
last_time_added = time.time()
ADD_LETTER_DELAY = 3.0

# Scanning effect variables
scan_start_time = 0
scan_duration = 0.6
scanning = False

# Current language
current_language = "EN" 

In [4]:
def render_arabic_box(frame, text, position, font_size=40):
    reshaped = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped)

    img_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(img_pil, "RGBA")  # نرسم بخلفية شفافة
    font = ImageFont.truetype("arial.ttf", font_size)

    try:
        bbox = draw.textbbox((0, 0), bidi_text, font=font)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
    except AttributeError:
        text_width, text_height = draw.textsize(bidi_text, font=font)

    x, y = position
    x = x - text_width  

    padding = 10
    rect_x1, rect_y1 = x - padding, y - padding
    rect_x2, rect_y2 = x + text_width + padding, y + text_height + padding
    
   
    draw.rectangle([rect_x1, rect_y1, rect_x2, rect_y2], fill=(255, 255, 255, 180))

    draw.text((x, y), bidi_text, font=font, fill=(0, 0, 0, 255))
    return cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)


In [5]:
def render_english_box(frame, text, position, font_size=1.2):
    text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_size, 2)
    text_width, text_height = text_size

    x, y = position
    padding = 10
    rect_x1, rect_y1 = x - padding, y - text_height - padding
    rect_x2, rect_y2 = x + text_width + padding, y + padding


    overlay = frame.copy()
    cv2.rectangle(overlay, (rect_x1, rect_y1), (rect_x2, rect_y2), (255, 255, 255), -1)
    alpha = 0.5  
    frame = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0)

    cv2.putText(frame, text, (x, y),
                cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), 3, cv2.LINE_AA)
    return frame

In [6]:
def put_arabic_text(frame, text, position, font_size=32, color=(0,0,0)):
    reshaped = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped)

  
    img_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(img_pil)
    font = ImageFont.truetype("arial.ttf", font_size)

    
    draw.text(position, bidi_text, font=font, fill=color)

   
    return cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)

In [7]:
def draw_camera_box(img, x1, y1, x2, y2, color=(0, 0, 255), thickness=3):
    cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness)

In [8]:
# Floating Keyboard Function
def draw_floating_keyboard(frame, current_language, highlighted_key=None):
    height, width = frame.shape[:2]

    # Keyboard dimensions
    keyboard_height = int(height * 0.3)
    keyboard_y = height - keyboard_height
    keyboard_width = width

    # # Draw keyboard background
    # cv2.rectangle(frame, (0, keyboard_y), (keyboard_width, height), (240, 240, 240), -1)
    # cv2.rectangle(frame, (0, keyboard_y), (keyboard_width, height), (180, 180, 180), 2)

    # Define keys
    if current_language == "EN":
        keys = english_letters + ["Space", "Backspace"]
        keys_per_row = 14
    else:
        keys = arabic_letters + ["Space", "Backspace"]
        keys_per_row = 10

    # Key sizes
    key_width = int((keyboard_width - 20) / keys_per_row)
    key_height = int(keyboard_height / 3)
    padding = 5

    # Draw keys
    for i, key in enumerate(keys):
        row = i // keys_per_row
        col = i % keys_per_row

        x1 = col * key_width + 10
        y1 = keyboard_y + row * key_height   
        x2 = x1 + key_width - padding
        y2 = y1 + key_height - padding

        # Highlight
        if key == highlighted_key:
            key_color = (100, 255, 100)
            text_color = (0, 0, 0)
        else:
            key_color = (200, 200, 200)
            text_color = (0, 0, 0)

        cv2.rectangle(frame, (x1, y1), (x2, y2), key_color, -1)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (100, 100, 100), 1)

        # Labels
        if key not in ["Space", "Backspace"]:
            text_size = cv2.getTextSize(key, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0]
            text_x = x1 + (key_width - text_size[0]) // 2
            text_y = y1 + (key_height + text_size[1]) // 2
            if current_language == "EN":
                cv2.putText(frame, key, (text_x, text_y),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, text_color, 2, cv2.LINE_AA)
            else:
                frame = put_arabic_text(frame, key, (text_x, text_y - 15),
                                        font_size=24, color=text_color)
        else:
            label = "__" if key == "Space" else "<-"
            text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
            text_x = x1 + (key_width - text_size[0]) // 2
            text_y = y1 + (key_height + text_size[1]) // 2
            cv2.putText(frame, label, (text_x, text_y),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, text_color, 2, cv2.LINE_AA)

    return frame


In [None]:
# Camera Settings

# Set Camera Resolution
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)    

# Create named window and set its size
cv2.namedWindow("Sign Language Translator", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Sign Language Translator", 1000, 600)

# Center the window on the screen
root = tk.Tk()
screen_w = root.winfo_screenwidth()
screen_h = root.winfo_screenheight()
root.destroy()
win_w, win_h = 1000, 600
x = (screen_w - win_w) // 2
y = (screen_h - win_h) // 2
cv2.moveWindow("Sign Language Translator", x, y)

In [10]:
# Main loop
while True:
    data_aux = []
    ret, frame = cap.read()
    frame = cv2.flip(frame, 1) 
    H, W, _ = frame.shape
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    current_time = time.time()

    predicted_character = None
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        mp_drawing.draw_landmarks(
            frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style()
        )

        x_, y_ = [], []
        data_aux = []
        for landmark in hand_landmarks.landmark:
            x_.append(landmark.x)
            y_.append(landmark.y)
        for landmark in hand_landmarks.landmark:
            data_aux.append(landmark.x - min(x_))
            data_aux.append(landmark.y - min(y_))

        x1, y1 = int(min(x_) * W) - 20, int(min(y_) * H) - 20
        x2, y2 = int(max(x_) * W) + 20, int(max(y_) * H) + 20

        if current_language == "EN":
            prediction = model_en.predict([np.asarray(data_aux)])
            predicted_character = labels_dict_en[int(prediction[0])]
        else:
            prediction = model_ar.predict([np.asarray(data_aux)])
            predicted_character = labels_dict_ar[int(prediction[0])]

        predictions_queue.append(predicted_character)
        most_common_char, count = Counter(predictions_queue).most_common(1)[0]

        if count > 15 and (most_common_char != last_added_char or current_time - last_time_added > ADD_LETTER_DELAY):
            if most_common_char == "Space":
                if current_language == "EN":
                    sentence_en += " "
                else:
                    sentence_ar += " "
            elif most_common_char == "Backspace":
                if current_language == "EN":
                    sentence_en = sentence_en[:-1]
                else:
                    sentence_ar = sentence_ar[:-1]
            else:
                if current_language == "EN":
                    sentence_en += most_common_char
                else:
                    sentence_ar += most_common_char
            last_added_char = most_common_char
            last_time_added = current_time
            scan_start_time = current_time
            scanning = True

        draw_camera_box(frame, x1, y1, x2, y2)

        if scanning and current_time - scan_start_time < scan_duration:
            progress = (current_time - scan_start_time) / scan_duration
            scan_y = int(y1 + progress * (y2 - y1))
            cv2.line(frame, (x1, scan_y), (x2, scan_y), (0, 255, 0), 2)
        else:
            scanning = False

        # ---------------- Show detected char above hand ----------------
        if current_language == "EN":
            cv2.putText(frame, most_common_char, (x1, y1 - 15),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 0), 3)
        else:
            display_char = most_common_char
            if most_common_char == "Space":
                display_char = "مسافة"
            elif most_common_char == "Backspace":
                display_char = "حذف"

            frame = put_arabic_text(frame, display_char, (x1, y1 - 40),
                                    font_size=36, color=(0,0,0))

    # Draw floating keyboard with highlighted key if one is predicted
    frame = draw_floating_keyboard(frame, current_language, predicted_character)
    
    # -------- Show sentences --------
    if sentence_en.strip():
        frame = render_english_box(frame, sentence_en, position=(30, 70))  # top-left quarter

    if sentence_ar.strip():
        frame = render_arabic_box(frame, sentence_ar, position=(W//2 - 20, 120))  # right half under English

    # Show current language
    # cv2.putText(frame, f"Language: {current_language}", (30, 30),
    #             cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2)

    cv2.imshow("Sign Language Translator", frame)

    key = cv2.waitKey(1)
    if key == ord("q"):
        break
    elif key == ord("c"):
        sentence_en = ""
        sentence_ar = ""
        last_added_char = ""
        predictions_queue.clear()
    elif key == 32:  # spacebar → switch language
        current_language = "AR" if current_language == "EN" else "EN"
        predictions_queue.clear()
    elif key == ord("z"):  # backspace
        if current_language == "EN" and sentence_en:
            sentence_en = sentence_en[:-1]
        elif current_language == "AR" and sentence_ar:
            sentence_ar = sentence_ar[:-1]
        last_added_char = ""
        predictions_queue.clear()

cap.release()
cv2.destroyAllWindows()