# Sign Language Detection Project

This Jupyter Notebook combines three scripts:

1. **capture_dataset.py** - Capture labeled hand landmark data using webcam.
2. **train_model.py** - Train a Multi-Layer Perceptron (MLP) classifier on the captured data.
3. **app.py** - Run a Streamlit app for image and real-time sign language detection.

---

## Requirements

- Python 3.x
- OpenCV
- MediaPipe
- TensorFlow/Keras
- Streamlit
- scikit-learn


## 1. Capture Dataset

Run this cell to capture hand landmark data for each sign. 
Press **q** to quit early during capture.


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import os
import time

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

DATA_DIR = "data"
os.makedirs(DATA_DIR, exist_ok=True)

def extract_landmarks(hand_landmarks):
    return np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()

def capture_for_label(label, num_samples=300, delay=0.05):
    samples = []
    cap = cv2.VideoCapture(0)
    with mp_hands.Hands(static_image_mode=False,
                        max_num_hands=1,
                        min_detection_confidence=0.5,
                        min_tracking_confidence=0.5) as hands:
        print(f"Starting capture for label '{label}'. Press 'q' to quit early.")
        while len(samples) < num_samples:
            ret, frame = cap.read()
            if not ret:
                print("Failed to grab frame")
                break
            frame = cv2.flip(frame, 1)
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            res = hands.process(rgb)
            if res.multi_hand_landmarks:
                lm = res.multi_hand_landmarks[0]
                vec = extract_landmarks(lm)
                samples.append(vec)
                mp_drawing.draw_landmarks(frame, lm, mp_hands.HAND_CONNECTIONS)
                cv2.putText(frame, f"{label}: {len(samples)}/{num_samples}", (10,30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
            else:
                cv2.putText(frame, f"No hand detected - {len(samples)}/{num_samples}", (10,30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)
            cv2.imshow("Capture (press q to stop)", frame)
            if cv2.waitKey(int(delay*1000)) & 0xFF == ord('q'):
                break
    cap.release()
    cv2.destroyAllWindows()
    samples = np.array(samples)
    if samples.size:
        save_path = os.path.join(DATA_DIR, f"{label}.npz")
        if os.path.exists(save_path):
            prev = np.load(save_path)['arr']
            combined = np.concatenate([prev, samples], axis=0)
            np.savez_compressed(save_path, combined)
        else:
            np.savez_compressed(save_path, samples)
        print(f"Saved {samples.shape[0]} samples to {save_path}")
    else:
        print("No samples captured.")

## 2. Train Model

After capturing sufficient samples for each label, run this cell to train the MLP classifier.


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, models
import json

DATA_DIR = "data"
MODEL_DIR = "saved_model"
os.makedirs(MODEL_DIR, exist_ok=True)

def load_data():
    X = []
    y = []
    for fn in os.listdir(DATA_DIR):
        if fn.endswith(".npz"):
            label = os.path.splitext(fn)[0]
            arr = np.load(os.path.join(DATA_DIR, fn))['arr']
            if arr.ndim == 1:
                arr = arr.reshape(1, -1)
            X.append(arr)
            y += [label] * arr.shape[0]
    if not X:
        raise RuntimeError("No data found. Run capture_dataset.py first.")
    X = np.vstack(X)
    y = np.array(y)
    return X, y

def build_model(input_dim, n_classes):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.BatchNormalization(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.4),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(n_classes, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

X, y = load_data()
print("Data shape:", X.shape, "labels:", np.unique(y))
le = LabelEncoder()
y_enc = le.fit_transform(y)
input_dim = X.shape[1]
n_classes = len(le.classes_)
X_train, X_val, y_train, y_val = train_test_split(X, y_enc, test_size=0.2, random_state=42, stratify=y_enc)
model = build_model(input_dim, n_classes)
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
]
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=100, batch_size=32, callbacks=callbacks)
model_path = os.path.join(MODEL_DIR, "sign_mlp.h5")
model.save(model_path)
with open(os.path.join(MODEL_DIR, "labels.json"), "w") as f:
    json.dump(list(le.classes_), f)
print(f"Saved model to {model_path} and labels.json")

## 3. Run Streamlit App

To run the Streamlit app in your terminal:

```bash
streamlit run app.py
```

Below is the code for reference:


In [None]:
import streamlit as st
import cv2
import mediapipe as mp
import numpy as np
import json
import os
import pytz
from datetime import datetime
import tensorflow as tf

MODEL_DIR = "saved_model"
MODEL_PATH = os.path.join(MODEL_DIR, "sign_mlp.h5")
LABELS_PATH = os.path.join(MODEL_DIR, "labels.json")

st.set_page_config(layout="wide", page_title="Sign Language Detector")

def in_allowed_time():
    tz = pytz.timezone("Asia/Kolkata")
    now = datetime.now(tz)
    hour = now.hour
    return 18 <= hour < 22, now.strftime("%Y-%m-%d %H:%M:%S %Z")

@st.cache_resource
def load_model_and_labels():
    if not os.path.exists(MODEL_PATH) or not os.path.exists(LABELS_PATH):
        return None, None
    model = tf.keras.models.load_model(MODEL_PATH)
    with open(LABELS_PATH, "r") as f:
        labels = json.load(f)
    return model, labels

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

def extract_landmarks_from_image(image_bgr):
    image = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    with mp_hands.Hands(static_image_mode=True, max_num_hands=1,
                        min_detection_confidence=0.5) as hands:
        res = hands.process(image)
        if res.multi_hand_landmarks:
            lm = res.multi_hand_landmarks[0]
            vec = np.array([[p.x, p.y, p.z] for p in lm.landmark]).flatten()
            return vec, lm
    return None, None

def predict_from_vector(model, labels, vec):
    proba = model.predict(vec.reshape(1, -1))[0]
    idx = int(np.argmax(proba))
    return labels[idx], float(proba[idx]), proba

model, labels = load_model_and_labels()

# The rest of the Streamlit app UI code...
