In [None]:
import os
import cv2

DATA_DIR = './data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

number_of_classes = 12
dataset_size = 100

cap = cv2.VideoCapture(0)  # Try changing the index to 0 or 1
if not cap.isOpened():
    print("Error: Could not open video device.")
    exit()

for j in range(number_of_classes):r
    class_dir = os.path.join(DATA_DIR, str(j))
    if not os.path.exists(class_dir):
        os.makedirs(class_dir)

    print('Collecting data for class {}'.format(j))

    done = False
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture image")
            break

        # Flip the frame horizontally to correct the mirror effect
        frame = cv2.flip(frame, 1)

        cv2.putText(frame, 'Ready? Press "Q" ! :)', (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3, cv2.LINE_AA)
        cv2.imshow('frame', frame)
        if cv2.waitKey(25) == ord('q'):
            break

    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture image")
            break

        # Flip the frame horizontally to correct the mirror effect
        frame = cv2.flip(frame, 1)

        cv2.imshow('frame', frame)
        cv2.waitKey(25)
        cv2.imwrite(os.path.join(class_dir, '{}.jpg'.format(counter)), frame)
        counter += 1

cap.release()
cv2.destroyAllWindows()


In [None]:
import os
import pickle
import mediapipe as mp
import cv2
import warnings
import tensorflow as tf

# Suppress specific UserWarning related to google.protobuf
warnings.filterwarnings("ignore", category=UserWarning, module='google.protobuf.symbol_database')

# Suppress TensorFlow logs
tf.get_logger().setLevel('ERROR')

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = r"C:\Users\sidso\Downloads\sign-language-detector-python-master\sign-language-detector-python-master\data"

data = []
labels = []
for dir_ in os.listdir(DATA_DIR):
    dir_path = os.path.join(DATA_DIR, dir_)
    if os.path.isdir(dir_path):  # Check if it's a directory
        for img_path in os.listdir(dir_path):
            data_aux = []

            x_ = []
            y_ = []

            img = cv2.imread(os.path.join(dir_path, img_path))
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            results = hands.process(img_rgb)
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    for i in range(len(hand_landmarks.landmark)):
                        x = hand_landmarks.landmark[i].x
                        y = hand_landmarks.landmark[i].y

                        x_.append(x)
                        y_.append(y)

                    for i in range(len(hand_landmarks.landmark)):
                        x = hand_landmarks.landmark[i].x
                        y = hand_landmarks.landmark[i].y
                        data_aux.append(x - min(x_))
                        data_aux.append(y - min(y_))

                data.append(data_aux)
                labels.append(dir_)

with open('data.pickle', 'wb') as f:
    pickle.dump({'data': data, 'labels': labels}, f)


In [5]:
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

# Load the data
data_dict = pickle.load(open(r"C:\Users\sidso\Downloads\sign-language-detector-python-master\sign-language-detector-python-master\data.pickle", 'rb'))

# Find the maximum length of the feature vectors
max_length = max([len(item) for item in data_dict['data']])

# Pad the data to ensure uniform length
padded_data = [np.pad(item, (0, max_length - len(item)), 'constant') for item in data_dict['data']]

# Convert to numpy array
data = np.asarray(padded_data)
labels = np.asarray(data_dict['labels'])

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

# Train the model
model = RandomForestClassifier()
model.fit(x_train, y_train)

# Predict and evaluate
y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)

# Output the results
print('{}% of samples were classified correctly !'.format(score * 100))

# Save the model
with open('model.p', 'wb') as f:
    pickle.dump({'model': model}, f)


100.0% of samples were classified correctly !


In [6]:
import pickle
import cv2
import mediapipe as mp
import numpy as np
import pyttsx3  # Import the text-to-speech library

# Load the pre-trained model
model_dict = pickle.load(open(r"C:\Users\sidso\Downloads\sign-language-detector-python-master\sign-language-detector-python-master\model.p", 'rb'))
model = model_dict['model']

# Initialize camera
cap = cv2.VideoCapture(0)

# Initialize MediaPipe hand detection
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

# Label dictionary for prediction
labels_dict = {0: 'A', 1: 'L', 2: 'G', 3: 'X', 4: 'U', 5: '1', 6: '2', 7: '6', 8: '9', 9: 'Namaste', 10: 'Good', 11: 'Morning'}

# Initialize pyttsx3 engine for text-to-speech
#engine = pyttsx3.init()

# Variable to store the last prediction to avoid repeating audio output
last_prediction = None

if not cap.isOpened():
    print("Error: Camera not opened.")
else:
    print("Camera opened successfully.")

while True:
    ret, frame = cap.read()

    if not ret:  # Check if frame is captured successfully
        print("Failed to grab frame")
        break

    # Flip the frame horizontally to fix the mirror effect
    frame = cv2.flip(frame, 1)

    data_aux = []
    x_ = []
    y_ = []

    H, W, _ = frame.shape

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame to detect hand landmarks
    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

        for hand_landmarks in results.multi_hand_landmarks:
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))

        # Ensure that data_aux has the correct number of features (84 features)
        if len(data_aux) < 84:
            # Pad with zeros if we have fewer than 84 features
            data_aux.extend([0] * (84 - len(data_aux)))

        # Prepare the bounding box for drawing
        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10
        x2 = int(max(x_) * W) - 10
        y2 = int(max(y_) * H) - 10

        # Make predictions using the model
        prediction = model.predict([np.asarray(data_aux)])

        predicted_character = labels_dict[int(prediction[0])]

        # Draw the prediction and bounding box
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
        cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)

        # Check if the prediction has changed
        #if predicted_character != last_prediction:
            # If the prediction is new, update the last prediction and give audio output
            #last_prediction = predicted_character
            #engine.say(predicted_character)
            #engine.runAndWait()  # Ensure the speech is played immediately

    # Show the frame with annotations
    cv2.imshow('frame', frame)

    if cv2.waitKey(1) & 0xFF == 27:  # Exit on 'ESC'
        break

cap.release()
cv2.destroyAllWindows()


Camera opened successfully.
















