In [1]:
pip install opencv-python






[notice] A new release of pip is available: 24.0 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
pip install mediapipe





[notice] A new release of pip is available: 24.0 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip





In [3]:
import os
import cv2
import numpy as np
import mediapipe as mp
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint


In [4]:
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)



In [5]:
# Dataset directory
DATASET_DIR = "SignImage48x48"
CLASSES = [chr(i) for i in range(65, 91)]  # A-Z
IMAGE_SIZE = 128


In [6]:
def extract_hand_landmarks(image):
    """Extracts hand landmarks from an image using MediaPipe."""
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.append([lm.x, lm.y, lm.z])
            return np.array(landmarks).flatten()
    return None

In [8]:
def load_data():
    """Loads the dataset, extracts hand landmarks, and prepares training data."""
    X, y = [], []
    for label, sign_class in enumerate(CLASSES):
        class_dir = os.path.join(DATASET_DIR, sign_class)
        if not os.path.exists(class_dir):
            continue

        for file_name in os.listdir(class_dir):
            file_path = os.path.join(class_dir, file_name)
            image = cv2.imread(file_path)
            if image is not None:
                landmarks = extract_hand_landmarks(image)
                if landmarks is not None:
                    X.append(landmarks)
                    y.append(label)

    return np.array(X), np.array(y)



In [9]:
# Load dataset
print("Loading data...")
X, y = load_data()

# Preprocess data
print("Preprocessing data...")
X = np.array(X, dtype=np.float32)
y = to_categorical(y, num_classes=len(CLASSES))


Loading data...
Preprocessing data...


In [10]:
# Reshape data for CNN
X = X.reshape((X.shape[0], X.shape[1], 1))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [11]:
# Build CNN model
model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Conv1D(128, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Conv1D(256, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(CLASSES), activation='softmax')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()



In [13]:
# Train the model
checkpoint = ModelCheckpoint("sign_language_model.keras", save_best_only=True, monitor="val_loss", mode="min")


In [14]:
print("Training the model...")
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=200,
    batch_size=32,
    callbacks=[checkpoint]
)


Training the model...
Epoch 1/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 184ms/step - accuracy: 0.0276 - loss: 3.2381 - val_accuracy: 0.2800 - val_loss: 3.0595
Epoch 2/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - accuracy: 0.2233 - loss: 2.9388 - val_accuracy: 0.1600 - val_loss: 2.7240
Epoch 3/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.1646 - loss: 2.7132 - val_accuracy: 0.1600 - val_loss: 2.5756
Epoch 4/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.2595 - loss: 2.7207 - val_accuracy: 0.1600 - val_loss: 2.5794
Epoch 5/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.2160 - loss: 2.6907 - val_accuracy: 0.1600 - val_loss: 2.6358
Epoch 6/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.2143 - loss: 2.5848 - val_accuracy: 0.1600 - val_loss: 2.6788
Epoch 7/200
[1m4

In [15]:
# Save the final model
model.save("sign_language_model_final.keras", save_format='keras')
print("Model training complete and saved.")




Model training complete and saved.


In [None]:
import os
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model

# Load the trained model
MODEL_PATH = "sign_language_model_final.keras"
model = load_model(MODEL_PATH)

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

# Class labels
CLASSES = [chr(i) for i in range(65, 91)]  # A-Z

# Function to extract hand landmarks
def extract_hand_landmarks(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.append([lm.x, lm.y, lm.z])
            return np.array(landmarks).flatten()
    return None

# Function to predict sign language in real-time and save predictions
def predict_sign_live():
    cap = cv2.VideoCapture(0)
    output_file = open("live_predictions.txt", "w")  # Save predictions
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        landmarks = extract_hand_landmarks(frame)
        if landmarks is not None:
            landmarks = landmarks.reshape(1, -1, 1).astype(np.float32)
            prediction = model.predict(landmarks)
            predicted_class = np.argmax(prediction)
            sign = CLASSES[predicted_class]
            
            cv2.putText(frame, f"Predicted Sign: {sign}", (50, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            
            # Save the prediction to a file
            output_file.write(f"Predicted Sign: {sign}\n")

        cv2.imshow('Sign Language Detection', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    output_file.close()
    print("Predictions saved to live_predictions.txt")

# Run real-time sign language detection
predict_sign_live()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 452ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3