## Preprocessing

In [None]:
import cv2
import os
import mediapipe as mp
import numpy as np
from tqdm import tqdm
import random

# =============== MediaPipe Hands ===================
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.4
)

def detect_hand_bbox(landmarks, img_w, img_h, extra=60):
    xs = [int(lm.x * img_w) for lm in landmarks]
    ys = [int(lm.y * img_h) for lm in landmarks]
    x_min = max(0, min(xs) - extra)
    y_min = max(0, min(ys) - extra)
    x_max = min(img_w, max(xs) + extra)
    y_max = min(img_h, max(ys) + extra)
    return x_min, y_min, x_max, y_max

def random_augment(img):
    if random.random() < 0.4:
        img = cv2.convertScaleAbs(img, alpha=random.uniform(0.7, 1.3))
    if random.random() < 0.4:
        lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        l = cv2.equalizeHist(l)
        img = cv2.merge((l, a, b))
        img = cv2.cvtColor(img, cv2.COLOR_LAB2BGR)
    if random.random() < 0.3:
        noise = np.random.randint(0, 20, img.shape, dtype='uint8')
        img = cv2.add(img, noise)
    if random.random() < 0.3:
        img = cv2.GaussianBlur(img, (5, 5), 0)
    if random.random() < 0.4:
        h, w = img.shape[:2]
        angle = random.uniform(-20, 20)
        M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1)
        img = cv2.warpAffine(img, M, (w, h))
    if random.random() < 0.3:
        h, w = img.shape[:2]
        pts1 = np.float32([[0,0],[w,0],[0,h],[w,h]])
        shift = random.randint(0, 40)
        pts2 = np.float32([[shift,0],[w-shift,0],[0,h],[w,h]])
        M = cv2.getPerspectiveTransform(pts1, pts2)
        img = cv2.warpPerspective(img, M, (w,h))
    return img

def process_image(image_path, size=(128, 128)):
    img = cv2.imread(image_path)
    if img is None:
        return None
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)
    if not result.multi_hand_landmarks:
        return cv2.resize(img, size)
    h, w, _ = img.shape
    lm = result.multi_hand_landmarks[0].landmark
    x1, y1, x2, y2 = detect_hand_bbox(lm, w, h)
    cropped = img[y1:y2, x1:x2]
    if cropped.size == 0:
        return cv2.resize(img, size)
    
    resized = cv2.resize(cropped, size)
    return resized

def clean_dataset(old_dir, new_dir, limit_per_class=600):
    os.makedirs(new_dir, exist_ok=True)
    classes = sorted(os.listdir(old_dir))
    print("Classes:", classes)
    for cls in tqdm(classes):
        old_path = os.path.join(old_dir, cls)
        new_path = os.path.join(new_dir, cls)
        os.makedirs(new_path, exist_ok=True)
        images = os.listdir(old_path)
        images = images[:limit_per_class]
        for img_name in images:
            dst = os.path.join(new_path, img_name)
            if os.path.exists(dst):
                continue
            src = os.path.join(old_path, img_name)
            processed = process_image(src)
            if processed is not None:
                cv2.imwrite(dst, processed)
    print("DONE preprocessing.")

# Run
OLD = r"D:\Gemy Study\FAI\Deep Learning\Project\data\Train_Alphabet"
NEW = r"processed_dataset_final_2"

clean_dataset(OLD, NEW)


## Model

In [None]:
# ============================================
# 1) IMPORTS
# ============================================
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

# ============================================
# 2) DEVICE
# ============================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

# ============================================
# 3) DATASET PATH
# ============================================
TRAIN_DIR = r"D:\Gemy Study\FAI\Deep Learning\Project\processed_dataset_final_2"
IMAGE_SIZE = 128

# ============================================
# 4) FUNCTION TO CALCULATE MEAN & STD
# ============================================
def compute_mean_std(dataset_path):
    print("Calculating dataset mean & std ...")

    transform = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.ToTensor()
    ])

    dataset = datasets.ImageFolder(dataset_path, transform=transform)
    loader = DataLoader(dataset, batch_size=64, shuffle=False)

    mean = 0.0
    std = 0.0
    total = 0

    for images, _ in loader:
        batch = images.size(0)
        images = images.view(batch, images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        total += batch

    mean /= total
    std /= total

    print(f"Mean  = {mean}")
    print(f"Std   = {std}")
    return mean, std

# احسب mean/std مرة واحدة
mean, std = compute_mean_std(TRAIN_DIR)

# ============================================
# 5) TRANSFORMS
# ============================================
train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomRotation(20),
    transforms.RandomPerspective(distortion_scale=0.3, p=0.5),
    transforms.ColorJitter(brightness=0.15, contrast=0.15),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

val_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# ============================================
# 6) LOAD DATASET WITH TRANSFORMS
# ============================================
train_dataset = datasets.ImageFolder(
    root=TRAIN_DIR,
    transform=train_transform
)

val_dataset = datasets.ImageFolder(
    root=TRAIN_DIR,
    transform=val_transform
)

train_size = int(0.8 * len(train_dataset))
val_size   = len(train_dataset) - train_size

train_subset, _ = random_split(
    train_dataset,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

_, val_subset = random_split(
    val_dataset,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_subset, batch_size=32, shuffle=False)

# ============================================
# 7) CNN MODEL
# ============================================
class ASL_CNN(nn.Module):
    def __init__(self, num_classes=27):
        super().__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # 3 channels بعد التحويل للـ RGB
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )

        # حساب حجم الـ Flatten أوتوماتيكياً
        with torch.no_grad():
            dummy = torch.zeros(1, 3, IMAGE_SIZE, IMAGE_SIZE)
            dummy = self.features(dummy)
            self.flatten_dim = dummy.view(1, -1).shape[1]

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.flatten_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = ASL_CNN(len(train_dataset.classes)).to(device)

# ============================================
# 8) TRAIN SETTINGS
# ============================================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2)

train_losses = []
val_losses = []
val_accuracies = []

# ============================================
# 9) TRAINING LOOP
# ============================================
def train_model(epochs):
    best_acc = 0.0
    print(f"Starting Training for {epochs} epochs...")

    for epoch in range(epochs):
        model.train()
        train_loss = 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=False):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # ------------------- VALIDATION ------------------
        model.eval()
        val_loss = 0
        all_preds, all_labels = [], []

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss   / len(val_loader)
        val_acc = (np.array(all_preds) == np.array(all_labels)).mean()

        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        val_accuracies.append(val_acc)

        print(f"Epoch {epoch+1}: Train={avg_train_loss:.4f} | Val={avg_val_loss:.4f} | Acc={val_acc*100:.2f}%")

        scheduler.step(val_acc)

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "best_model_2.pth")

    print(f"Training Done! Best Accuracy = {best_acc*100:.2f}%")
    model.load_state_dict(torch.load("best_model_2.pth"))

# ============================================
# 10) RUN TRAINING
# ============================================
train_model(epochs=10)

# ============================================
# 11) PLOTS
# ============================================
plt.figure(figsize=(8,5))
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.legend()
plt.title("Loss Curve")
plt.show()

plt.figure(figsize=(8,5))
plt.plot(val_accuracies, label='Val Accuracy')
plt.legend()
plt.title("Validation Accuracy")
plt.show()


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import mediapipe as mp
import cv2
import os
from PIL import Image
# ============================================
# 1) TEST DATASET PATH
# ============================================
TEST_DIR = r"D:\Gemy Study\FAI\Deep Learning\Project\data\Test_Alphabet"  
# ============================================
# 2) TRANSFORMS (like validation)
# ============================================
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.4
)

def detect_hand_bbox(landmarks, img_w, img_h, extra=60):
    xs = [int(lm.x * img_w) for lm in landmarks]
    ys = [int(lm.y * img_h) for lm in landmarks]
    x_min = max(0, min(xs) - extra)
    y_min = max(0, min(ys) - extra)
    x_max = min(img_w, max(xs) + extra)
    y_max = min(img_h, max(ys) + extra)
    return x_min, y_min, x_max, y_max

# ============================================
# 3) CUSTOM DATASET WITH HAND CROP
# ============================================
class HandCropTestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.img_paths = []
        self.labels = []

        for idx, cls in enumerate(self.classes):
            cls_path = os.path.join(root_dir, cls)
            for img_name in os.listdir(cls_path):
                self.img_paths.append(os.path.join(cls_path, img_name))
                self.labels.append(idx)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        path = self.img_paths[idx]
        img = cv2.imread(path)
        if img is None:
            raise ValueError(f"Image not found: {path}")

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h, w, _ = img.shape
        result = hands.process(img_rgb)

        if result.multi_hand_landmarks:
            lm = result.multi_hand_landmarks[0].landmark
            x1, y1, x2, y2 = detect_hand_bbox(lm, w, h)
            img_rgb = img_rgb[y1:y2, x1:x2]

        if self.transform:
            img_rgb = Image.fromarray(img_rgb)
            img_rgb = self.transform(img_rgb)

        label = self.labels[idx]
        return img_rgb, label

# ============================================
# 4) TRANSFORMS (like validation)
# ============================================
test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)  
])

# ============================================
# 5) LOAD TEST DATASET
# ============================================
test_dataset = HandCropTestDataset(TEST_DIR, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# ============================================
# 6) LOAD MODEL
# ============================================
model = ASL_CNN(num_classes=len(test_dataset.classes)).to(device)
model.load_state_dict(torch.load("best_model_2.pth", map_location=device,weights_only=True))
model.eval()

# ============================================
# 7) EVALUATE ON TEST
# ============================================
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# ============================================
# 8) ACCURACY
# ============================================
accuracy = (np.array(all_preds) == np.array(all_labels)).mean()
print(f"Test Accuracy: {accuracy*100:.2f}%")

# ============================================
# 9) CONFUSION MATRIX
# ============================================
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(12,10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

# ============================================
# 10) CLASSIFICATION REPORT
# ============================================
report = classification_report(all_labels, all_preds, target_names=test_dataset.classes)
print(report)


## Test

In [None]:
import torch
import cv2
from torchvision import transforms
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
import mediapipe as mp

# ============================================
# 1) PATH OF YOUR IMAGE
# ============================================
image_path = r"D:\Gemy Study\FAI\Deep Learning\Project\test.jpg"  # عدل هنا

# ============================================
# 2) PREPROCESSING
# ============================================
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1)

def mediapipe_crop(img):
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    res = hands.process(rgb)

    if not res.multi_hand_landmarks:
        return img  # fallback

    h, w, _ = img.shape
    lm = res.multi_hand_landmarks[0].landmark
    xs = [int(l.x * w) for l in lm]
    ys = [int(l.y * h) for l in lm]

    x1, y1 = max(0, min(xs)-60), max(0, min(ys)-60)
    x2, y2 = min(w, max(xs)+60), min(h, max(ys)+60)

    crop = img[y1:y2, x1:x2]
    return crop if crop.size != 0 else img

preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)  
])

# ============================================
# 3) LOAD IMAGE
# ============================================
img = cv2.imread(image_path)
if img is None:
    raise ValueError("Image not found at the given path!")
img = mediapipe_crop(img)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_tensor = preprocess(img_rgb).unsqueeze(0).to(device)

# ============================================
# 4) LOAD MODEL
# ============================================
model = ASL_CNN(num_classes=len(train_dataset.classes)).to(device)
model.load_state_dict(torch.load("best_model_2.pth", map_location=device))
model.eval()

# ============================================
# 5) PREDICTION
# ============================================
with torch.no_grad():
    output = model(img_tensor)
    probs = F.softmax(output, dim=1)  # نحسب الاحتمالات
    conf, pred = torch.max(probs, 1)

predicted_class = train_dataset.classes[pred.item()]
confidence = conf.item() * 100

# ============================================
# 6) DISPLAY IMAGE WITH PREDICTION
# ============================================
plt.figure(figsize=(5,5))
plt.imshow(img_rgb)
plt.title(f"Prediction: {predicted_class} ({confidence:.2f}%)", fontsize=14)
plt.axis('off')
plt.show()


In [None]:
import torch
import cv2
from torchvision import transforms
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
import mediapipe as mp

# ============================================
# 1) PATH OF YOUR IMAGE
# ============================================
image_path = r"D:\Gemy Study\FAI\Deep Learning\Project\test.jpg"  # عدل هنا

# ============================================
# 2) PREPROCESSING
# ============================================
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1)

def mediapipe_crop(img):
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    res = hands.process(rgb)

    if not res.multi_hand_landmarks:
        return img  # fallback

    h, w, _ = img.shape
    lm = res.multi_hand_landmarks[0].landmark
    xs = [int(l.x * w) for l in lm]
    ys = [int(l.y * h) for l in lm]

    x1, y1 = max(0, min(xs)-60), max(0, min(ys)-60)
    x2, y2 = min(w, max(xs)+60), min(h, max(ys)+60)

    crop = img[y1:y2, x1:x2]
    return crop if crop.size != 0 else img

preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)  
])

# ============================================
# 3) LOAD IMAGE
# ============================================
img = cv2.imread(image_path)
if img is None:
    raise ValueError("Image not found at the given path!")
img = mediapipe_crop(img)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_tensor = preprocess(img_rgb).unsqueeze(0).to(device)

# ============================================
# 4) LOAD MODEL
# ============================================
model = ASL_CNN(num_classes=len(train_dataset.classes)).to(device)
model.load_state_dict(torch.load("best_model_2.pth", map_location=device))
model.eval()

# ============================================
# 5) PREDICTION
# ============================================
with torch.no_grad():
    output = model(img_tensor)
    probs = F.softmax(output, dim=1)  # نحسب الاحتمالات
    conf, pred = torch.max(probs, 1)

predicted_class = train_dataset.classes[pred.item()]
confidence = conf.item() * 100

# ============================================
# 6) DISPLAY IMAGE WITH PREDICTION
# ============================================
plt.figure(figsize=(5,5))
plt.imshow(img_rgb)
plt.title(f"Prediction: {predicted_class} ({confidence:.2f}%)", fontsize=14)
plt.axis('off')
plt.show()


## code test by camera

In [None]:
## Model setup and loading trained weights
import torch
from torchvision import datasets
from torchvision import transforms
import torch.nn as nn

# ============================================
# 1) SET DEVICE
# ============================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

# ============================================
# 2) SAME PATH OF TRAINED DATASET
# ============================================
TRAIN_DIR = r"D:\Gemy Study\FAI\Deep Learning\Project\processed_dataset_final_2"
IMAGE_SIZE = 128

# ============================================
# 3) LOAD mean & std (اللي حسبتهم قبل التدريب)
# ============================================
mean = torch.tensor([0.4822, 0.4417, 0.3973])   # غيرهم لو عندك قيم مختلفة
std  = torch.tensor([0.2228, 0.2257, 0.2268])

# ============================================
# 4) LOAD CLASSES (للتوقع)
# ============================================
full_dataset = datasets.ImageFolder(TRAIN_DIR)
classes = full_dataset.classes
print("Loaded Classes:", classes)

# ============================================
# 5) DEFINE SAME CNN MODEL
# ============================================
class ASL_CNN(nn.Module):
    def __init__(self, num_classes=27):
        super().__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )

        with torch.no_grad():
            dummy = torch.zeros(1, 3, IMAGE_SIZE, IMAGE_SIZE)
            dummy = self.features(dummy)
            self.flatten_dim = dummy.view(1, -1).shape[1]

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.flatten_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# ============================================
# 6) LOAD MODEL + WEIGHTS
# ============================================
model = ASL_CNN(num_classes=len(classes)).to(device)
model.load_state_dict(torch.load("best_model_2.pth", map_location=device))
model.eval()

print("Model Loaded Successfully!")

In [None]:
## Code for Real-time Hand Sign Recognition using Webcam
import cv2
from PIL import Image
import torch
from torchvision import transforms
import mediapipe as mp
import torch.nn.functional as F

# ============================================
# 1) SETUP MEDIA PIPE HANDS
# ============================================
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# ============================================
# 2) TRANSFORMS (مثل validation)
# ============================================
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# ============================================
# 3) OPEN WEBCAM
# ============================================
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open webcam")
    exit()

print("Webcam started. Press 'q' to quit.")

# ============================================
# 4) REALTIME LOOP
# ============================================
while True:
    ret, frame = cap.read()
    if not ret:
        break

    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb)
    h, w, _ = frame.shape

    if results.multi_hand_landmarks:
        # Get bounding box
        x_min, y_min, x_max, y_max = w, h, 0, 0
        for lm in results.multi_hand_landmarks[0].landmark:
            x, y = int(lm.x * w), int(lm.y * h)
            x_min, y_min = min(x_min, x), min(y_min, y)
            x_max, y_max = max(x_max, x), max(y_max, y)

        pad = 60
        x_min, y_min = max(0, x_min - pad), max(0, y_min - pad)
        x_max, y_max = min(w, x_max + pad), min(h, y_max + pad)

        # Crop and preprocess hand region
        hand_img = img_rgb[y_min:y_max, x_min:x_max]
        pil_img = Image.fromarray(hand_img)
        img_tensor = transform(pil_img).unsqueeze(0).to(device)

        # Prediction
        model.eval()
        with torch.no_grad():
            output = model(img_tensor)
            probs = F.softmax(output, dim=1)
            conf, pred_idx = torch.max(probs, 1)

        pred_label = full_dataset.classes[pred_idx.item()]
        conf_score = conf.item() * 100

        # Draw bounding box
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

        # Put prediction text above box
        text = f"{pred_label} ({conf_score:.1f}%)"
        text_y = max(30, y_min - 10)  # فوق الـ box
        color = (0, 255, 0) if conf_score > 70 else (0, 165, 255) if conf_score > 40 else (0, 0, 255)
        cv2.putText(frame, text, (x_min, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

    # Show frame
    cv2.imshow("Hand Sign Recognition", frame)

    # Quit with 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


## Model_

In [None]:
# ============================================
# 1) IMPORTS
# ============================================
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

# ============================================
# 2) DEVICE
# ============================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

# ============================================
# 3) DATASET PATH
# ============================================
TRAIN_DIR = r"D:\Gemy Study\FAI\Deep Learning\Project\processed_dataset_final"
IMAGE_SIZE = 128

# ============================================
# 4) FUNCTION TO CALCULATE MEAN & STD
# ============================================
def compute_mean_std(dataset_path):
    print("Calculating dataset mean & std ...")

    transform = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.ToTensor()
    ])

    dataset = datasets.ImageFolder(dataset_path, transform=transform)
    loader = DataLoader(dataset, batch_size=64, shuffle=False)

    mean = 0.0
    std = 0.0
    total = 0

    for images, _ in loader:
        batch = images.size(0)
        images = images.view(batch, images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        total += batch

    mean /= total
    std /= total

    print(f"Mean  = {mean}")
    print(f"Std   = {std}")
    return mean, std

mean, std = compute_mean_std(TRAIN_DIR)

# ============================================
# 5) TRANSFORMS
# ============================================
train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomRotation(20),
    transforms.RandomPerspective(distortion_scale=0.3, p=0.5),
    transforms.ColorJitter(brightness=0.15, contrast=0.15),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

val_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# ============================================
# 6) LOAD DATASET WITH TRANSFORMS
# ============================================
full_dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=train_transform)

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size

train_subset, val_subset = random_split(full_dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))

val_subset.dataset.transform = val_transform

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_subset, batch_size=32, shuffle=False)

# ============================================
# 7) CNN MODEL
# ============================================
class ASL_CNN(nn.Module):
    def __init__(self, num_classes=27):
        super().__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )

        with torch.no_grad():
            dummy = torch.zeros(1, 3, IMAGE_SIZE, IMAGE_SIZE)
            dummy = self.features(dummy)
            self.flatten_dim = dummy.view(1, -1).shape[1]

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.flatten_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = ASL_CNN(len(full_dataset.classes)).to(device)

# ============================================
# 8) TRAIN SETTINGS
# ============================================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2)

train_losses = []
val_losses = []
val_accuracies = []

# ============================================
# 9) TRAINING LOOP
# ============================================
def train_model(epochs):
    best_acc = 0.0
    print(f"Starting Training for {epochs} epochs...")

    for epoch in range(epochs):
        model.train()
        train_loss = 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=False):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # ------------------- VALIDATION ------------------
        model.eval()
        val_loss = 0
        all_preds, all_labels = [], []

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss   / len(val_loader)
        val_acc = (np.array(all_preds) == np.array(all_labels)).mean()

        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        val_accuracies.append(val_acc)

        print(f"Epoch {epoch+1}: Train={avg_train_loss:.4f} | Val={avg_val_loss:.4f} | Acc={val_acc*100:.2f}%")

        scheduler.step(val_acc)

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "best_model.pth")

    print(f"Training Done! Best Accuracy = {best_acc*100:.2f}%")
    model.load_state_dict(torch.load("best_model.pth"))

# ============================================
# 10) RUN TRAINING
# ============================================
train_model(epochs=10)

# ============================================
# 11) PLOTS
# ============================================
plt.figure(figsize=(8,5))
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.legend()
plt.title("Loss Curve")
plt.show()

plt.figure(figsize=(8,5))
plt.plot(val_accuracies, label='Val Accuracy')
plt.legend()
plt.title("Validation Accuracy")
plt.show()
