In [None]:
import torch
import torchvision.transforms as transforms
import torch.nn.functional as F
import numpy as np
import serial
from PIL import Image
import cv2
import mediapipe as mp
import json
import time
import sys
sys.path.append('../')
from model import model as nn_model

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

try:
    arduino = serial.Serial('COM6', 9600, timeout=1)
    time.sleep(2)  # Wait for connection to establish
    print("Arduino connected successfully")
except serial.SerialException as e:
    print(f"Failed to connect to Arduino: {e}")
    arduino = None

model_path = 'model/best_model.pth'
try:
    state_dict = torch.load(model_path, map_location=device)
    
    if any(k.startswith('_orig_mod.') for k in state_dict.keys()):
        state_dict = {k.replace('_orig_mod.', ''): v for k, v in state_dict.items()}
    
    model = nn_model.EfficientNetB0(num_classes=36).to(device)
    model.load_state_dict(state_dict)
    model.eval()
    print('Model loaded successfully')
except Exception as e:
    print(f"Error loading model: {e}")
    exit()

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

try:
    with open('model/class_labels.json', 'r') as f:
        class_labels = json.load(f)
    print('Class labels loaded')
except Exception as e:
    print(f"Error loading class labels: {e}")
    exit()

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error opening camera")
    exit()

current_word = []
current_prediction = None
confidence = 0
last_update_time = 0
update_interval = 0.5  

def send_to_arduino(command):
    if arduino and arduino.is_open:
        try:
            arduino.write(f"{command}\n".encode('utf-8'))
            time.sleep(0.05)  
        except serial.SerialException as e:
            print(f"Error sending to Arduino: {e}")


send_to_arduino("CLEAR")

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        print("Ignoring empty frame")
        continue
    
    
    frame = cv2.flip(frame, 1)
    
    
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            try:
               
                landmarks_array = np.array([[lm.x, lm.y] for lm in hand_landmarks.landmark])
                x_min, y_min = np.min(landmarks_array, axis=0)
                x_max, y_max = np.max(landmarks_array, axis=0)
                
                
                padding = 0.05
                x_min = max(0, x_min - padding)
                y_min = max(0, y_min - padding)
                x_max = min(1, x_max + padding)
                y_max = min(1, y_max + padding)
                
                
                h, w = frame.shape[:2]
                bbox = (int(x_min * w), int(y_min * h), int(x_max * w), int(y_max * h))
                
                hand_img = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
                if hand_img.size == 0:
                    continue
                    
                pil_img = Image.fromarray(cv2.cvtColor(hand_img, cv2.COLOR_BGR2RGB))
                tensor_img = transform(pil_img).unsqueeze(0).to(device)
                
                with torch.no_grad():
                    outputs = model(tensor_img)
                    prob = F.softmax(outputs, dim=1)
                    confidence, predicted = torch.max(prob, 1)
                    confidence = confidence.item()
                    predicted_class = str(predicted.item())
                    
                    if predicted_class in class_labels and confidence > 0.7:
                        current_prediction = class_labels[predicted_class]
                        
                        current_time = time.time()
                        if current_time - last_update_time > update_interval:
                            send_to_arduino(f"LETTER:{current_prediction}")
                            last_update_time = current_time
                        
                        cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
                        label = f"{current_prediction} ({confidence:.2f})"
                        cv2.putText(frame, label, (bbox[0], bbox[1]-10), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                
            except Exception as e:
                print(f"Error processing hand: {e}")
    
    word_text = 'Current word: ' + ''.join(current_word) if current_word else "Current word: "
    cv2.putText(frame, word_text, (20, 40), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
    
    if current_prediction:
        pred_text = f"Current prediction: {current_prediction} ({confidence:.2f})"
        cv2.putText(frame, pred_text, (20, 80), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    
    instructions = "Press: 'a'=add, 'b'=backspace, 's'=save, 'c'=clear, ESC=exit"
    cv2.putText(frame, instructions, (20, frame.shape[0]-20),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
    
    cv2.imshow('ASL Detection', frame)
    
    key = cv2.waitKey(1) & 0xFF
    if key == ord('a'):  # Add current prediction to word
        if current_prediction and confidence > 0.7:
            current_word.append(current_prediction)
            print(f"Added '{current_prediction}' to word")
            send_to_arduino(f"WORD:{''.join(current_word)}")
            
    elif key == ord('b'):  # Backspace 
        if current_word:
            removed = current_word.pop()
            print(f"Removed last letter: '{removed}'")
            send_to_arduino(f"WORD:{''.join(current_word)}")
            
    elif key == ord('s'):  # Save/print word
        if current_word:
            final_word = ''.join(current_word)
            print(f"\nFinal Word: {final_word}")
            send_to_arduino(f"SAVE:{final_word}")
            current_word = []
            
    elif key == ord('c'):  # Clear word
        current_word = []
        current_prediction = None
        print("Word cleared")
        send_to_arduino("CLEAR")
        
    elif key == 27:  # ESC to exit
        break

cap.release()
cv2.destroyAllWindows()
if arduino and arduino.is_open:
    arduino.close()