In [1]:
import numpy as np
import cv2
import os
import mediapipe as mp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Initialize MediaPipe Hands
mpHands = mp.solutions.hands
hands = mpHands.Hands(max_num_hands=1)  # Detect only one hand
mpDraw = mp.solutions.drawing_utils

# Set the path to your dataset
path_to_data = "./new_alpha_2/" 

# Initialize a dictionary to hold file paths for each class
Sign_file_names_dict = {}

# Populate the dictionary with image file paths
for img_dir in os.scandir(path_to_data):
    if img_dir.is_dir():
        sign_name = img_dir.name
        Sign_file_names_dict[sign_name] = []
        for entry in os.scandir(img_dir.path):
            if entry.is_file() and entry.name.lower().endswith(('.png', '.jpg', '.jpeg')):
                Sign_file_names_dict[sign_name].append(entry.path)

# Define the class labels
class_dict = {name: idx for idx, name in enumerate(Sign_file_names_dict.keys())}

# Function to load and preprocess images
def load_and_preprocess_images(Sign_file_names_dict, class_dict, image_size=(100, 100)):
    X, y = [], []
    for sign_name, image_paths in Sign_file_names_dict.items():
        for image_path in image_paths:
            # Load the image
            img = cv2.imread(image_path)
            if img is None:
                print(f"Error loading image: {image_path}")
                continue
            imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = hands.process(imgRGB)

            landmarks = []
            if results.multi_hand_landmarks:
                # Process the first detected hand
                handLms = results.multi_hand_landmarks[0]
                for lm in handLms.landmark:
                    landmarks.extend([lm.x, lm.y, lm.z])
            else:
                # If no hand is detected, append zeros
                landmarks = [0.0] * 63  # 21 landmarks * 3 coordinates

            # Resize the image
            img_resized = cv2.resize(img, image_size)
            # Normalize the image
            img_normalized = img_resized / 255.0

            X.append(landmarks)
            y.append(class_dict[sign_name])

    X = np.array(X)
    y = np.array(y)
    return X, y

# Load and preprocess the data
X, y = load_and_preprocess_images(Sign_file_names_dict, class_dict)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Verify the shape of the first sample
print(f"Shape of X[0]: {X[0].shape}")
print(f"Number of elements in X[0]: {len(X[0])}")


Shape of X[0]: (63,)
Number of elements in X[0]: 63


In [2]:
def convert_to_float_and_round(input_list):
    return list(map(lambda x: round(float(x), 4), input_list))

In [3]:
print(len(X))
class_dict

2793


{'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'del': 4,
 'e': 5,
 'f': 6,
 'g': 7,
 'h': 8,
 'i': 9,
 'j': 10,
 'k': 11,
 'l': 12,
 'm': 13,
 'n': 14,
 'o': 15,
 'p': 16,
 'q': 17,
 'r': 18,
 's': 19,
 'space': 20,
 't': 21,
 'u': 22,
 'v': 23,
 'w': 24,
 'x': 25,
 'y': 26,
 'z': 27}

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
from sklearn import svm

from sklearn.ensemble import RandomForestClassifier

# model = svm.SVC(decision_function_shape='ovo')
# model.fit(X_train, y_train)

# model = RandomForestClassifier(max_depth=9, random_state=0)
# model.fit(X_train, y_train)


    # keras.layers.Dropout(0.5),
   #  keras.layers.Dense(30, activation='relu'),
   #  keras.layers.Dropout(0.5),
   #  keras.layers.Dense(15, activation='relu'),
   # #  keras.layers.Dropout(0.5),

model = models.Sequential([
    layers.InputLayer(input_shape=(63,)),  # 21 landmarks * 3 coordinates
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(len(np.unique(y_encoded)), activation='softmax')  # Output layer
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=500, batch_size=63, validation_data=(X_test, y_test))




Epoch 1/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.0438 - loss: 3.3145 - val_accuracy: 0.1002 - val_loss: 3.2416
Epoch 2/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.1444 - loss: 3.1866 - val_accuracy: 0.1932 - val_loss: 3.0656
Epoch 3/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.2771 - loss: 2.9567 - val_accuracy: 0.3005 - val_loss: 2.7576
Epoch 4/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3516 - loss: 2.6213 - val_accuracy: 0.4043 - val_loss: 2.4005
Epoch 5/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4315 - loss: 2.2753 - val_accuracy: 0.4633 - val_loss: 2.0808
Epoch 6/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4897 - loss: 1.9816 - val_accuracy: 0.5081 - val_loss: 1.8639
Epoch 7/500
[1m36/36[0m [32m━━

<keras.src.callbacks.history.History at 0x29ab01d6450>

In [6]:
img = cv2.imread("./numbers_1_hand_signs/99_143/13.jpg")

imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  
results = hands.process(imgRGB)

if results.multi_hand_landmarks:
    for handLms in results.multi_hand_landmarks:
        a = []
        for id, lm in enumerate(handLms.landmark):
            temp =[]
            temp.append(lm.x)
            temp.append(lm.y)
            temp.append(lm.z)
            a.append(temp)
        print(len(a))
        Numpy_array = np.array(a)
        flatten_array = Numpy_array.flatten()
        print(flatten_array)
        print(len(flatten_array))

# Assuming flatten_array is your 1D array with 63 elements
flatten_array = np.array(flatten_array).reshape(1, -1)

21
[ 3.41040969e-01  6.64668560e-01 -1.64455912e-07  4.05093998e-01
  6.28210425e-01 -1.36750080e-02  4.48419780e-01  5.68777502e-01
 -1.45512959e-02  4.75034207e-01  5.14351189e-01 -1.78712383e-02
  4.94595438e-01  4.80084360e-01 -2.10941248e-02  4.02668744e-01
  4.82772648e-01  5.94023895e-03  4.12792891e-01  4.01649147e-01
 -2.80971732e-03  4.16507930e-01  3.57204914e-01 -7.72492727e-03
  4.20436680e-01  3.16816449e-01 -1.10468036e-02  3.67001742e-01
  4.83677626e-01  2.39770254e-03  3.72743636e-01  4.25600082e-01
 -3.09506059e-02  3.76547009e-01  4.82813954e-01 -4.60818075e-02
  3.78788412e-01  5.31814277e-01 -4.57099937e-02  3.32709819e-01
  4.90523607e-01 -3.29517317e-03  3.33672047e-01  4.35255229e-01
 -3.64301465e-02  3.43398035e-01  4.92767632e-01 -4.01473530e-02
  3.49583149e-01  5.36802649e-01 -3.15928459e-02  2.97491819e-01
  5.01111329e-01 -9.94537771e-03  2.81915873e-01  4.37529832e-01
 -2.81374734e-02  2.77353704e-01  4.00262743e-01 -3.13893519e-02
  2.74940878e-01  3.57

In [8]:
output = model.predict( X_test)
print(output)# Assuming 'class_index' is the predicted index

class_dict = ['a', 'b', 'c', 'd',"Delete", 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',"Space", 't', 'u', 'v', 'w', 'x', 'y', 'z']


class_index = np.argmax(output)

print(f"Predicted class index: {class_dict[class_index]}")
print(output)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[[3.3129253e-09 1.5591494e-29 2.0389379e-27 ... 9.8431073e-16
  9.9999833e-01 2.6602991e-21]
 [1.6688349e-32 3.4234312e-31 5.7631812e-18 ... 8.4861834e-10
  2.1805764e-32 2.3633400e-16]
 [3.9832353e-17 3.0159585e-27 2.5879804e-31 ... 2.9394638e-24
  9.9999988e-01 1.4231340e-26]
 ...
 [0.0000000e+00 6.0029330e-23 7.0373291e-17 ... 6.0457620e-14
  1.4133956e-35 3.6205348e-15]
 [1.1544602e-34 9.9998093e-01 6.3917875e-09 ... 8.7455493e-30
  8.4441493e-35 6.8965917e-21]
 [2.0977168e-06 4.8931545e-29 1.2933889e-16 ... 2.0049454e-03
  9.0501608e-12 2.4191129e-12]]


IndexError: list index out of range

In [9]:
class_dict

['a',
 'b',
 'c',
 'd',
 'Delete',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 'Space',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [11]:
import pickle 

with open('model2_alpha_f.pkl', 'wb') as file:
    pickle.dump(model, file)

In [10]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, output, normalize=True, sample_weight=None)
accuracy

ValueError: Classification metrics can't handle a mix of multiclass and continuous-multioutput targets