In [None]:
import cv2
import mediapipe as mp
import os
import numpy as np
from tensorflow.keras.models import load_model


model = load_model("model_landmarks.h5") # Load the model

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5) # Initialize the hands module from mediapipe  
mp_drawing = mp.solutions.drawing_utils

alphabet = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
            "K", "L", "M", "N", "O", "P", "Q", "R", "S",
            "T", "U", "V", "W", "X",  "Y", "Z"]

cap = cv2.VideoCapture(0)
# Set the main window to fullscreen
#cv2.namedWindow('frame', cv2.WND_PROP_FULLSCREEN)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 450)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 450)

MAX_LINE_LEN = 10
sentence = ""
word = ""
while True:
    ret, frame = cap.read()
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)      #convert frame to RGB color space because mediapipe works with RGB images
    results = hands.process(rgb_frame) 

    cv2.putText(frame, f"Word: {word}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0))

    #add text wrapping
    lines = []
    if sentence.split():
        words = sentence.split()
        line = words[0]
        for word in words[1:]:
            if len(line + ' ' + word) <= MAX_LINE_LEN:
                line += ' ' + word
            else:
                lines.append(line)
                line = word
        lines.append(line)
        y_offset = 0
        for line in lines:
            cv2.putText(frame, f"Sentence: {line}", (10, 90 + y_offset), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255))
            y_offset += 30
    else:
        cv2.putText(frame, f"Sentence: {sentence}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255))

    if results.multi_hand_landmarks is not None:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,  #draw landmarks on frame
                                        mp_drawing.DrawingSpec(color=(0, 117, 128), thickness=2, circle_radius=4), 
                                        mp_drawing.DrawingSpec(color=(53, 101, 77), thickness=2, circle_radius=2)
                                        )

            landmarks_positions = [(lm.x * frame.shape[1], lm.y * frame.shape[0]) 
                                    for lm in hand_landmarks.landmark]
            landmarks_array = np.array(landmarks_positions).flatten()

            bboxC = (
                min(landmarks_positions, key=lambda x: x[0])[0],  #letftmost x-coordinate 
                min(landmarks_positions, key=lambda x: x[1])[1],  #topmost y-coordinate
                max(landmarks_positions, key=lambda x: x[0])[0] - min(landmarks_positions, key=lambda x: x[0])[0], #width of bbox
                max(landmarks_positions, key=lambda x: x[1])[1] - min(landmarks_positions, key=lambda x: x[1])[1]  #height of bbox
            )

            #scaling factor to scale the bbox
            scaling_factor = 1.5  

            #scaled bbox coordinates
            bboxC = (
                int(bboxC[0] - (bboxC[2] * (scaling_factor - 1) / 2)), #adjsuted leftmost x-coordinate
                int(bboxC[1] - (bboxC[3] * (scaling_factor - 1) / 2)), #adjusted topmost y-coordinate
                int(bboxC[2] * scaling_factor), #adjusted width of bbox
                int(bboxC[3] * scaling_factor)  #adjusted height of bbox
            )

            for connection in mp_hands.HAND_CONNECTIONS: #draw lines between landmarks
                start_point = tuple(np.multiply([hand_landmarks.landmark[connection[0]].x, hand_landmarks.landmark[connection[0]].y], [450, 450]).astype(int))
                end_point = tuple(np.multiply([hand_landmarks.landmark[connection[1]].x, hand_landmarks.landmark[connection[1]].y], [450, 450]).astype(int))
                cv2.line(rgb_frame, start_point, end_point, (255, 0, 0), 2)  #draw line between two points

            cv2.rectangle(frame, (int(bboxC[0]), int(bboxC[1])),
                            (int(bboxC[0] + bboxC[2]), int(bboxC[1] + bboxC[3])), (0, 0, 0), 2) #draw rectangle around hand

            hand_crop = frame[int(bboxC[1]):int(bboxC[1] + bboxC[3]), int(bboxC[0]):int(bboxC[0] + bboxC[2])] #crop hand from frame
            
            
            landmarks_input = landmarks_array.reshape(1, -1)
            
            predictions = model.predict(landmarks_input) #predict letter

            predicted_class = np.argmax(predictions) #get index of predicted letter

            confidence = predictions[0, predicted_class] #get confidence of prediction
            
            cv2.putText(frame, f"Predicted: {alphabet[predicted_class]} ({confidence:.2f})", (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)  # Put text on frame
            
            

    cv2.imshow('frame', frame) #show frame

    key = cv2.waitKey(1) #wait for key press

    if key == ord('q'):
        break
    elif key == ord('c'):
        #cv2.imshow('capture frame', frame)
        sentence += alphabet[predicted_class]
        word += alphabet[predicted_class]
    elif key == ord('d'):
        sentence = sentence[:len(sentence) - 1]
        word = word[:len(word) - 1]
    elif key == ord('s'):
        sentence += ' '
        word = ""

cap.release()
cv2.destroyAllWindows() 
