In [5]:
import os
import cv2
import mediapipe as mp
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Extract hand keypoints and create an Array 

In [3]:
# Path to your image dataset
DATASET_DIR = r'E:\7th semester\FYP-1\Sign-language\augmented_dataset'

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1)
mp_drawing = mp.solutions.drawing_utils

X = []
y = []

# Step 1: Read and process images
for label in os.listdir(DATASET_DIR):
    label_path = os.path.join(DATASET_DIR, label)
    if not os.path.isdir(label_path):
        continue
    for img_file in os.listdir(label_path):
        img_path = os.path.join(label_path, img_file)
        image = cv2.imread(img_path)
        if image is None:
            continue
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = hands.process(image_rgb)

        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            landmark_list = []
            for lm in hand_landmarks.landmark:
                landmark_list.extend([lm.x, lm.y, lm.z])
            X.append(landmark_list)
            y.append(label)

# Step 2: Convert to arrays
X = np.array(X)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Create Dataset OF Hand Keypoints

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import mediapipe as mp

# Path to your image dataset
DATASET_DIR = r'E:\7th semester\FYP-1\Sign-language\augmented_dataset'

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1)
mp_drawing = mp.solutions.drawing_utils

data = []

# Step 1: Read and process images
for label in os.listdir(DATASET_DIR):
    label_path = os.path.join(DATASET_DIR, label)
    if not os.path.isdir(label_path):
        continue
    for img_file in os.listdir(label_path):
        img_path = os.path.join(label_path, img_file)
        image = cv2.imread(img_path)
        if image is None:
            continue
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = hands.process(image_rgb)

        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            landmark_row = []
            for lm in hand_landmarks.landmark:
                landmark_row.extend([lm.x, lm.y, lm.z])
            landmark_row.append(label)  # Append class label
            data.append(landmark_row)

# Step 2: Create column names
columns = []
for i in range(1, 22):  # 21 landmarks
    columns.extend([f'X{i}', f'Y{i}', f'Z{i}'])
columns.append('class_name')

# Step 3: Create DataFrame and save
df = pd.DataFrame(data, columns=columns)
df.to_csv('hand_landmarks_dataset.csv', index=False)
print("CSV file 'hand_landmarks_dataset.csv' has been created.")


CSV file 'hand_landmarks_dataset.csv' has been created.


In [6]:
# Step 3: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

In [5]:
# Step 4: Simple Neural Network
model = Sequential([
    Dense(128, activation='relu', input_shape=(63,)),
    Dense(64, activation='relu'),
    Dense(y_categorical.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test))

print("Model training complete!")


Epoch 1/100
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.1413 - loss: 3.2523 - val_accuracy: 0.4049 - val_loss: 2.1143
Epoch 2/100
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4602 - loss: 1.9163 - val_accuracy: 0.5801 - val_loss: 1.5070
Epoch 3/100
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5857 - loss: 1.4081 - val_accuracy: 0.6346 - val_loss: 1.2353
Epoch 4/100
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6387 - loss: 1.2078 - val_accuracy: 0.6486 - val_loss: 1.1415
Epoch 5/100
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6685 - loss: 1.1061 - val_accuracy: 0.7010 - val_loss: 1.0233
Epoch 6/100
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7037 - loss: 1.0169 - val_accuracy: 0.7199 - val_loss: 0.9747
Epoch 7/100
[1m484/48

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.activations import swish  # or use 'swish' as a string
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.optimizers import SGD

sgd = SGD(learning_rate=0.01, momentum=0.9)
RMS = RMSprop(learning_rate=0.01)


model = Sequential()
model.add(Dense(128, input_shape=(63,), activation=swish))  # or 'swish'
model.add(Dropout(0.3))
model.add(Dense(64, activation='swish'))
model.add(Dense(32, activation='swish'))
model.add(Dense(y_categorical.shape[1], activation='softmax'))
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test))

print("Model training complete!")

Epoch 1/200
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8262 - loss: 0.5088 - val_accuracy: 0.8080 - val_loss: 0.5684
Epoch 2/200
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8366 - loss: 0.4987 - val_accuracy: 0.8398 - val_loss: 0.4996
Epoch 3/200
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8383 - loss: 0.4882 - val_accuracy: 0.8685 - val_loss: 0.4261
Epoch 4/200
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8367 - loss: 0.4873 - val_accuracy: 0.8532 - val_loss: 0.4610
Epoch 5/200
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8314 - loss: 0.4925 - val_accuracy: 0.8349 - val_loss: 0.5112
Epoch 6/200
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8348 - loss: 0.4911 - val_accuracy: 0.8581 - val_loss: 0.4543
Epoch 7/200
[1m484/48

In [6]:
# Optional: Save label encoder and model
import pickle
model.save("Accuracy 90.keras")
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

In [7]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
import pickle

# Load trained model and label encoder
model = load_model('Accuracy 90.keras')
with open("label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

# Initialize MediaPipe
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)
mp_drawing = mp.solutions.drawing_utils

# Start webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    image = cv2.flip(frame, 1)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw landmarks on screen
            mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Extract features
            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.extend([lm.x, lm.y, lm.z])
            
            if len(landmarks) == 63:
                input_data = np.array(landmarks).reshape(1, -1)
                prediction = model.predict(input_data)
                predicted_index = np.argmax(prediction)
                predicted_label = label_encoder.inverse_transform([predicted_index])[0]
                confidence = np.max(prediction) * 100

                # Show prediction on screen
                cv2.putText(
                    image,
                    f'{predicted_label} ({confidence:.2f}%)',
                    (10, 40),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (0, 255, 0),
                    2
                )

    cv2.imshow("Sign Language Prediction", image)

    if cv2.waitKey(1) & 0xFF == 27:  # Press ESC to quit
        break

cap.release()
cv2.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 