In [1]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split  
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
import numpy as np
import pandas as pd
import pickle
import cv2
import csv




In [2]:
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

In [3]:
def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

  return annotated_image

In [4]:
base_options = python.BaseOptions(model_asset_path='hand_landmarker_model/hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

In [5]:
cap = cv2.VideoCapture(0) # 0 for webcam
    
while cap.isOpened():
    ret, frame = cap.read()
    
    if not ret:
        print("Ignoring empty camera frame.")
        continue

    # Convert the BGR image to RGB
    image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)      
    
    # Make Detections
    results = detector.detect(image)
    
    # Draw landmarks on the image
    annotated_image = draw_landmarks_on_image(image.numpy_view(), results)

    cv2.imshow('Raw Webcam Feed', annotated_image)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



In [7]:
# num_coords = 21 # number of coordinates for each hand
num_coords = len(results.hand_landmarks[0])
num_coords

21

In [8]:
landmarks = ['class']
for val in range(1, num_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

In [102]:
with open('hand_coords.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [150]:
class_name = "Y" # Add different classes here, and re-run the below code for each class (A-I and K-Y)

In [151]:
cap = cv2.VideoCapture(0)
    
while cap.isOpened():
    ret, frame = cap.read()
    
    # Recolor Feed
    image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
    
    # Make Detections
    results = detector.detect(image)
    
    annotated_image = draw_landmarks_on_image(image.numpy_view(), results)

    # Export coordinates
    try:
        left = results.hand_landmarks[0]
        left_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in left]).flatten())
        left_row.insert(0, class_name)

        with open('hand_coords.csv', mode='a', newline='') as f:
            csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_writer.writerow(left_row)
    except:
        pass

    try:
        right = results.hand_landmarks[1] 
        right_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in right]).flatten())
        right_row.insert(0, class_name)

        with open('hand_coords.csv', mode='a', newline='') as f:
            csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_writer.writerow(right_row)
    except:
        pass

    cv2.imshow('Raw Webcam Feed', annotated_image)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [155]:
df = pd.read_csv('hand_coords.csv')

In [156]:
df

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z19,v19,x20,y20,z20,v20,x21,y21,z21,v21
0,A,0.327416,0.686052,-4.879510e-08,0.0,0.383065,0.678044,-0.017933,0.0,0.436072,...,-0.028225,0.0,0.314897,0.554365,-0.018203,0.0,0.311588,0.569675,-0.006574,0.0
1,A,0.750257,0.638664,-4.532401e-07,0.0,0.686401,0.613268,-0.020584,0.0,0.635253,...,-0.028124,0.0,0.751667,0.517083,-0.016122,0.0,0.758040,0.525703,-0.003833,0.0
2,A,0.749820,0.649885,-4.635528e-07,0.0,0.683082,0.612238,-0.015313,0.0,0.632462,...,-0.024092,0.0,0.746365,0.519977,-0.010559,0.0,0.754480,0.529700,0.003267,0.0
3,A,0.334021,0.670078,-6.679477e-08,0.0,0.393784,0.654377,-0.013607,0.0,0.438822,...,-0.024689,0.0,0.319113,0.538535,-0.014349,0.0,0.315626,0.554056,-0.002527,0.0
4,A,0.344497,0.669886,-7.059939e-08,0.0,0.400088,0.649059,-0.015033,0.0,0.444857,...,-0.034685,0.0,0.328284,0.534055,-0.023771,0.0,0.324535,0.549336,-0.012235,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3664,Y,0.324587,0.814561,-1.752176e-07,0.0,0.367312,0.757715,-0.014664,0.0,0.388649,...,-0.024665,0.0,0.240225,0.622351,-0.027139,0.0,0.230589,0.593470,-0.026095,0.0
3665,Y,0.327134,0.815123,-2.563510e-07,0.0,0.369158,0.756248,-0.018892,0.0,0.388643,...,-0.022274,0.0,0.228723,0.608077,-0.026383,0.0,0.213770,0.571957,-0.027481,0.0
3666,Y,0.326702,0.805681,-1.507500e-07,0.0,0.367199,0.755932,-0.019415,0.0,0.388183,...,-0.023388,0.0,0.237111,0.611668,-0.027647,0.0,0.226035,0.581014,-0.027392,0.0
3667,Y,0.328697,0.808834,-2.199633e-07,0.0,0.370464,0.754879,-0.018349,0.0,0.389898,...,-0.017999,0.0,0.234614,0.603035,-0.020427,0.0,0.221547,0.567119,-0.019932,0.0


In [157]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

In [158]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234) # 70% training data, 30% testing data

In [159]:
y_test

2437    O
3485    X
201     A
2758    R
444     B
       ..
254     A
434     B
214     A
2537    P
2901    S
Name: class, Length: 1101, dtype: object

In [163]:
# increase the max_iter parameter if the model is not converging
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression(max_iter=300, class_weight='balanced')),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier())
} 

In [165]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

In [166]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression',
                  LogisticRegression(class_weight='balanced', max_iter=300))]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [167]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

lr 0.9745685740236149
rc 0.9291553133514986
rf 0.9600363306085377
gb 0.9500454132606722


In [171]:
# save the best model
with open('sign_detection.pkl', 'wb') as f:
    pickle.dump(fit_models['lr'], f)

In [172]:
# load the model
with open('sign_detection.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

In [173]:
loaded_model

In [177]:
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()

    image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
    
    # Make Detections
    results = detector.detect(image)
    
    annotated_image = draw_landmarks_on_image(image.numpy_view(), results)
       
    # Export coordinates
    try:
        left = results.hand_landmarks[0]
        left_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in left]).flatten())
        
        X = pd.DataFrame([left_row])
        
        # predict the class
        sign_det_class = loaded_model.predict(X)[0]
        sign_det_prob = loaded_model.predict_proba(X)[0]
    
        cv2.rectangle(annotated_image, (0,0), (250,60), (245, 117, 16), -1)

        cv2.putText(annotated_image, 'CLASS', (95,12), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(annotated_image, sign_det_class.split(' ')[0], (90,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        cv2.putText(annotated_image, 'PROB', (15,12),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(annotated_image, str(round(sign_det_prob[np.argmax(sign_det_prob)],2)), (10,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

    except:
        pass

    try:
        right = results.hand_landmarks[1]
        right_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in right]).flatten())

        X = pd.DataFrame([right_row])

        # predict the class
        sign_det_class = loaded_model.predict(X)[0]
        sign_det_prob = loaded_model.predict_proba(X)[0]

        cv2.rectangle(annotated_image, (0,0), (250,60), (245, 117, 16), -1)

        cv2.putText(annotated_image, 'CLASS', (95,12), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(annotated_image, sign_det_class.split(' ')[0], (90,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        cv2.putText(annotated_image, 'PROB', (15,12),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(annotated_image, str(round(sign_det_prob[np.argmax(sign_det_prob)],2)), (10,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
    except:
        pass

    cv2.imshow('Predictions', annotated_image)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

