In [1]:
# train_sl_mnist_fixed_labelencode.py
import os
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

class SLmniDataset(Dataset):
    """
    Expects a CSV where a label column exists and remaining columns are pixels (0-255).
    Returns tensors shaped (1, 28, 28) float32 in [0,1].
    """
    def __init__(self, csv_file, label_col, transform=None):
        df = pd.read_csv(csv_file)
        if label_col not in df.columns:
            raise KeyError(f"Label column '{label_col}' not found in {csv_file}. Columns: {df.columns.tolist()}")
        self.labels = df[label_col].values.astype('int64')
        pixel_cols = [c for c in df.columns if c != label_col]
        self.X = df[pixel_cols].values.astype('float32') / 255.0
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img = self.X[idx].reshape(28, 28)
        img_tensor = torch.from_numpy(img).unsqueeze(0)  # (1,28,28)
        if self.transform:
            img_tensor = self.transform(img_tensor)
        return img_tensor, int(self.labels[idx])

class SimpleCNN(nn.Module):
    def __init__(self, nclass):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128), nn.ReLU(),
            nn.Linear(128, nclass)
        )
    def forward(self, x):
        return self.net(x)

def main(csv_path, epochs=10, batch=64, lr=1e-3, test_size=0.15, seed=42, label_col=None):
    if not os.path.isfile(csv_path):
        raise FileNotFoundError(f"CSV file not found: {csv_path}")

    # read full CSV to fit LabelEncoder and detect label column
    df = pd.read_csv(csv_path)
    if label_col is None:
        label_col = df.columns[0]  # default to first column
    if label_col not in df.columns:
        raise KeyError(f"Label column '{label_col}' not found. Columns: {df.columns.tolist()}")

    # encode labels to contiguous ints 0..C-1
    le = LabelEncoder()
    df[label_col] = le.fit_transform(df[label_col].values)
    n_classes = len(le.classes_)
    print(f"Detected {n_classes} classes. Encoded labels to 0..{n_classes-1}")

    # split and save train/val CSVs (these now contain encoded labels)
    train_df, val_df = train_test_split(df, test_size=test_size, stratify=df[label_col], random_state=seed)
    train_csv = 'train.csv'
    val_csv = 'val.csv'
    train_df.to_csv(train_csv, index=False)
    val_df.to_csv(val_csv, index=False)

    # save label encoder for later decoding
    with open('label_encoder.pkl', 'wb') as f:
        pickle.dump(le, f)
    print("Saved label encoder to label_encoder.pkl")

    # create datasets & loaders
    train_ds = SLmniDataset(train_csv, label_col=label_col, transform=None)
    val_ds = SLmniDataset(val_csv, label_col=label_col, transform=None)
    train_loader = DataLoader(train_ds, batch_size=batch, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_ds, batch_size=batch, shuffle=False, num_workers=0)

    # device and model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = SimpleCNN(nclass=n_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=lr)

    # quick sanity check: ensure labels in dataset are within bounds
    train_label_vals = np.unique(train_df[label_col].values)
    if train_label_vals.max() >= n_classes or train_label_vals.min() < 0:
        raise ValueError(f"Label values out of bounds after encoding: min={train_label_vals.min()}, max={train_label_vals.max()}, n_classes={n_classes}")

    for e in range(epochs):
        model.train()
        total = 0
        correct = 0
        running_loss = 0.0
        for X, y in tqdm(train_loader, desc=f"Train E{e+1}/{epochs}"):
            X = X.to(device)
            y = y.to(device)
            opt.zero_grad()
            logits = model(X)
            loss = criterion(logits, y)
            loss.backward()
            opt.step()
            preds = logits.argmax(1)
            total += y.size(0)
            correct += (preds == y).sum().item()
            running_loss += loss.item() * y.size(0)
        train_acc = correct / total
        train_loss = running_loss / total

        # validation
        model.eval()
        total = 0
        correct = 0
        with torch.no_grad():
            for X, y in val_loader:
                X = X.to(device)
                y = y.to(device)
                preds = model(X).argmax(1)
                total += y.size(0)
                correct += (preds == y).sum().item()
        val_acc = correct / total if total > 0 else 0.0

        print(f"Epoch {e+1}/{epochs}: train_loss={train_loss:.4f} train_acc={train_acc:.4f} val_acc={val_acc:.4f}")

    torch.save(model.state_dict(), "sl_mnist_cnn.pt")
    print("Saved model to sl_mnist_cnn.pt")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Train simple CNN on Sign-Language-MNIST CSV (with label encoding)")
    parser.add_argument("--csv", type=str, default=r"C:\Users\Admin\OneDrive\Documents\sign\sign_mnist_train.csv",
                        help="Path to sign-mnist CSV file (train+labels).")
    parser.add_argument("--epochs", type=int, default=12)
    parser.add_argument("--batch", type=int, default=64)
    parser.add_argument("--lr", type=float, default=1e-3)
    parser.add_argument("--label_col", type=str, default=None,
                        help="Name of the label column (defaults to first column when not provided)")
    args, unknown = parser.parse_known_args()
    main(args.csv, epochs=args.epochs, batch=args.batch, lr=args.lr, label_col=args.label_col)


Detected 24 classes. Encoded labels to 0..23
Saved label encoder to label_encoder.pkl


Train E1/12: 100%|██████████| 365/365 [00:34<00:00, 10.72it/s]


Epoch 1/12: train_loss=1.2087 train_acc=0.6314 val_acc=0.8767


Train E2/12: 100%|██████████| 365/365 [00:28<00:00, 12.94it/s]


Epoch 2/12: train_loss=0.1694 train_acc=0.9543 val_acc=0.9852


Train E3/12: 100%|██████████| 365/365 [00:24<00:00, 15.02it/s]


Epoch 3/12: train_loss=0.0302 train_acc=0.9963 val_acc=0.9985


Train E4/12: 100%|██████████| 365/365 [00:22<00:00, 16.53it/s]


Epoch 4/12: train_loss=0.0122 train_acc=0.9984 val_acc=1.0000


Train E5/12: 100%|██████████| 365/365 [00:25<00:00, 14.10it/s]


Epoch 5/12: train_loss=0.0028 train_acc=1.0000 val_acc=1.0000


Train E6/12: 100%|██████████| 365/365 [00:36<00:00,  9.95it/s]


Epoch 6/12: train_loss=0.0016 train_acc=1.0000 val_acc=0.9998


Train E7/12: 100%|██████████| 365/365 [00:40<00:00,  9.01it/s]


Epoch 7/12: train_loss=0.0010 train_acc=1.0000 val_acc=1.0000


Train E8/12: 100%|██████████| 365/365 [00:32<00:00, 11.19it/s]


Epoch 8/12: train_loss=0.0007 train_acc=1.0000 val_acc=1.0000


Train E9/12: 100%|██████████| 365/365 [00:27<00:00, 13.34it/s]


Epoch 9/12: train_loss=0.0005 train_acc=1.0000 val_acc=1.0000


Train E10/12: 100%|██████████| 365/365 [00:30<00:00, 11.90it/s]


Epoch 10/12: train_loss=0.0488 train_acc=0.9848 val_acc=1.0000


Train E11/12: 100%|██████████| 365/365 [00:35<00:00, 10.14it/s]


Epoch 11/12: train_loss=0.0008 train_acc=1.0000 val_acc=1.0000


Train E12/12: 100%|██████████| 365/365 [00:20<00:00, 17.39it/s]


Epoch 12/12: train_loss=0.0004 train_acc=1.0000 val_acc=1.0000
Saved model to sl_mnist_cnn.pt


In [2]:
pip install mediapipe


Note: you may need to restart the kernel to use updated packages.


In [None]:
import cv2
import mediapipe as mp

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)

# Function to detect finger states
def get_finger_states(hand_landmarks):
    finger_tips = [8, 12, 16, 20]
    finger_states = []

    # Index, Middle, Ring, Pinky
    for tip in finger_tips:
        if hand_landmarks.landmark[tip].y < hand_landmarks.landmark[tip - 2].y:
            finger_states.append(1)
        else:
            finger_states.append(0)

    # Thumb
    if hand_landmarks.landmark[4].x < hand_landmarks.landmark[3].x:
        finger_states.insert(0, 1)
    else:
        finger_states.insert(0, 0)

    return finger_states


#  FINAL GESTURE DICTIONARY (GLOBAL SCOPE)
gesture_dict = {
    (0, 0, 0, 0, 0): "closed Fist,Power",
    (1, 1, 1, 1, 1): "Hi",
    (1, 0, 0, 0, 0): "Thumbs Up",
    (0, 1, 0, 0, 0): "Pointing",
    (0, 1, 1, 0, 0): "Peace",
    (0, 0, 0, 0, 1): "little",
    (1, 1, 0, 1, 1): "Sorry",
    (1, 0, 0, 0, 1): "Call me",
    (1, 1, 0, 0, 1): "I Love You",
    (1, 0, 1, 0, 1): "Come here",
    (0, 1, 1, 1, 0): "Please",
    (1, 1, 0, 1, 0): "Where are you?",
    (0, 0, 1, 0, 0): "I don’t understand",
    (0, 1, 0, 1, 0): "I am fine",
    (1, 0, 1, 1, 1): "Welcome",
    (1, 0, 0, 1, 0): "I am sick",
    (1, 1, 1, 0, 1): "I need medicine",
    (0, 0, 0, 1, 1): "I am hungry",
    (0, 0, 1, 1, 0): "Stop",
    (0, 1, 1, 0, 1): "thank you",
   
}



 

# Open webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    success, image = cap.read()
    if not success:
        break

    image = cv2.flip(image, 1)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)

    gesture_text = ""

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image, hand_landmarks, mp_hands.HAND_CONNECTIONS
            )
            finger_states = tuple(get_finger_states(hand_landmarks))
            gesture_text = gesture_dict.get(
                finger_states, "Gesture not recognized"
            )

    cv2.putText(
        image, gesture_text, (10, 40),
        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2
    )

    cv2.imshow("Hand Gesture Recognition", image)

    if cv2.waitKey(1) & 0xFF == 27:  # ESC
        break

cap.release()
cv2.destroyAllWindows()
