# Requiremnets

In [None]:
# %pip install mediapipe==0.9.0.1 scikit-learn==1.2.0 opencv-python==4.7.0.68

%pip install -r requirements.txt

# Collect Images

In [None]:

number_of_classes = 26  # Assuming you have 26 classes
dataset_size = 100

In [None]:
import os
import cv2

DATA_DIR = './data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

number_of_classes = 26  # Assuming you have 26 classes
dataset_size = 100

cap = cv2.VideoCapture(0)

try:
    if not cap.isOpened():
        print("Error: Could not open camera.")
        exit()

    for j in range(number_of_classes):
        if not os.path.exists(os.path.join(DATA_DIR, str(j))):
            os.makedirs(os.path.join(DATA_DIR, str(j)))

        print('Collecting data for class {}'.format(j))

        while True:
            ret, frame = cap.read()
            cv2.putText(frame, 'Ready? Press "S" to start capturing, "N" to skip, or "q" to exit!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 3, cv2.LINE_AA)
            cv2.imshow('frame', frame)
            key = cv2.waitKey(25)
            
            if key == ord('s'):
                print("Capturing data for class {}".format(j))
                break
            elif key == ord('n'):
                print("Skipping class {}".format(j))
                break
            elif key == ord('q'):
                print("Exiting the program.")
                cap.release()
                cv2.destroyAllWindows()
                

        if key == ord('n'):
            continue  # Skip to the next class

        counter = 0
        while counter < dataset_size:
            ret, frame = cap.read()
            cv2.imshow('frame', frame)
            cv2.waitKey(25)
            cv2.imwrite(os.path.join(DATA_DIR, str(j), '{}.jpg'.format(counter)), frame)
            counter += 1

except Exception as e:
    print("An error occurred:", e)

finally:
    cap.release()
    cv2.destroyAllWindows()


In [None]:
cap.release()
cv2.destroyAllWindows()

# Create Dataset

In [None]:
import os
import pickle

import mediapipe as mp
import cv2
import matplotlib.pyplot as plt

In [None]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)


In [None]:
""" # To show the land marks on the image (only last frame "Just for Understanding")

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './data'

for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_))[:1]:
        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hands_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(img_rgb, # image to draw 
                                          hands_landmarks, # Model output
                                          mp_hands.HAND_CONNECTIONS, # hand connections
                                          mp_drawing_styles.get_default_hand_landmarks_style(),
                                          mp_drawing_styles.get_default_hand_connections_style())

           

        plt.figure()
        plt.imshow(img_rgb)

plt.show()

"""

In [None]:
DATA_DIR = './data'

data = []
labels = []

In [None]:
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []

        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)

f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

#### Mediapipe, like many other image processing libraries and models, expects input images to be in RGB format. RGB (Red, Green, Blue) is a standard color space used in computer vision and image processing tasks. In RGB format, the color channels are arranged as red, green, and blue, respectively.

#### Normalization: The extracted x and y coordinates are normalized by subtracting the minimum x and y values from all coordinates. Normalization helps in making the data scale-invariant and reduces the impact of varying hand sizes and positions in the images.

# train classiffier

- Loads hand gesture data from a pickle file.
- Splits the data into training and testing sets.
- Trains a Support Vector Classifier (SVC) model using the training data.
- Evaluates the model's accuracy on the testing data.
- Saves the trained model to a file.

In [None]:
import pickle
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # For SVC
from sklearn.ensemble import RandomForestClassifier # For ensemble
from sklearn.metrics import accuracy_score
import numpy as np

In [None]:
data_dict = pickle.load(open('./data.pickle', 'rb')) # read the data from pickle file
data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

In [None]:
# x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)
x_train,x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42, shuffle=True, stratify=labels)

# model = RandomForestClassifier()
model = SVC(kernel='linear')  # You can choose different kernels like 'rbf' or 'poly' as well


model.fit(x_train, y_train)

y_predict = model.predict(x_test)
accuracy = accuracy_score(y_test, y_predict)
print("Accuracy:", accuracy)
score = accuracy_score(y_predict, y_test)
print('{}% of samples were classified correctly !'.format(score * 100))

f = open('model.p', 'wb') # write the model to a file
pickle.dump({'model': model}, f)
f.close()

# test the classifier

In [None]:
import pickle
import cv2
import mediapipe as mp
import numpy as np

In [None]:
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

In [None]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

In [None]:
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
# labels from 0 to 12
labels_dict = {0:'A', 1:'B', 2:'C', 3:'D', 4:'E', 5:'F', 6:'G', 7:'H', 8:'I', 9:'J', 10:'K', 11:'L', 12:'M', 13:'N', 14:'O', 15:'P', 16:'Q', 17:'R', 18:'S', 19:'T', 20:'U', 21:'V', 22:'W', 23:'X', 24:'Y', 25:'Z'}

In [None]:
# cap = cv2.VideoCapture(0)
# while True:

#     data_aux = []
#     x_ = []
#     y_ = []

#     ret, frame = cap.read()

#     H, W, _ = frame.shape

#     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

#     results = hands.process(frame_rgb)
#     if results.multi_hand_landmarks:
#         for hand_landmarks in results.multi_hand_landmarks:
#             mp_drawing.draw_landmarks(
#                 frame,  # image to draw
#                 hand_landmarks,  # model output
#                 mp_hands.HAND_CONNECTIONS,  # hand connections
#                 mp_drawing_styles.get_default_hand_landmarks_style(),
#                 mp_drawing_styles.get_default_hand_connections_style())

#         for hand_landmarks in results.multi_hand_landmarks:
#             for i in range(len(hand_landmarks.landmark)):
#                 x = hand_landmarks.landmark[i].x
#                 y = hand_landmarks.landmark[i].y

#                 x_.append(x)
#                 y_.append(y)

#             for i in range(len(hand_landmarks.landmark)):
#                 x = hand_landmarks.landmark[i].x
#                 y = hand_landmarks.landmark[i].y
#                 data_aux.append(x - min(x_))
#                 data_aux.append(y - min(y_))

#         x1 = int(min(x_) * W) - 10
#         y1 = int(min(y_) * H) - 10

#         x2 = int(max(x_) * W) - 10
#         y2 = int(max(y_) * H) - 10


#         prediction = model.predict([np.asarray(data_aux)])
#         # If we not add this condition, the model will try to predict the label even if there is no hand in the frame (that means program will try to find the key even when it is not in the dictionary, which will cause an error)
#         if prediction.shape[0] > 0:
#             predicted_label_index = int(prediction[0])
#             predicted_character = labels_dict.get(predicted_label_index, "Unknown")
#         else:
#             predicted_character = "Unknown"
#         cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
#         cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)


#     cv2.imshow('frame', frame)
#     c = cv2.waitKey(1)
#     if c == ord('q'):
#         break
# cap.release()
# cv2.destroyAllWindows()

# Error handling using try-catch (especially multihand error)

In [None]:
import cv2

# Initialize the webcam
cap = cv2.VideoCapture(0)

while True:
    try:
        # Data and variables initialization
        data_aux = []
        x_ = []
        y_ = []

        # Read a frame from the webcam
        ret, frame = cap.read()

        # Get the dimensions of the frame
        H, W, _ = frame.shape

        # Convert the frame to RGB color space
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame to detect hand landmarks
        results = hands.process(frame_rgb)

        # If hand landmarks are detected
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    frame,  # image to draw
                    hand_landmarks,  # model output
                    mp_hands.HAND_CONNECTIONS,  # hand connections
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style())

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            x1 = int(min(x_) * W) - 10
            y1 = int(min(y_) * H) - 10

            x2 = int(max(x_) * W) - 10
            y2 = int(max(y_) * H) - 10

            # Make predictions using the model
            prediction = model.predict([np.asarray(data_aux)])

            # If predictions are available
            if prediction.shape[0] > 0:
                predicted_label_index = int(prediction[0])
                predicted_character = labels_dict.get(predicted_label_index, "Unknown")
            else:
                predicted_character = "Unknown"

            # Draw bounding box and label on the frame
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
            cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)

        # Display the frame
        cv2.imshow('frame', frame)

        # Check for user input to exit the loop
        c = cv2.waitKey(1)
        if c == ord('q'):
            break

    except Exception as e:
        # Print the exception
        print("An error occurred:", str(e))

# Release the webcam and close all windows
cap.release()
cv2.destroyAllWindows()


In [None]:
cap.release()
cv2.destroyAllWindows()