In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split  
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
import numpy as np
import pandas as pd
import pickle
import cv2
import csv

In [None]:
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

In [None]:
def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

  return annotated_image

In [None]:
base_options = python.BaseOptions(model_asset_path='hand_landmarker_model/hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

In [None]:
cap = cv2.VideoCapture(0) # 0 for webcam
    
while cap.isOpened():
    ret, frame = cap.read()
    
    if not ret:
        print("Ignoring empty camera frame.")
        continue

    # Convert the BGR image to RGB
    image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)      
    
    # Make Detections
    results = detector.detect(image)
    
    # Draw landmarks on the image
    annotated_image = draw_landmarks_on_image(image.numpy_view(), results)

    cv2.imshow('Raw Webcam Feed', annotated_image)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
# num_coords = 21 # number of coordinates for each hand
num_coords = len(results.hand_landmarks[0])
num_coords

In [None]:
landmarks = ['class']
for val in range(1, num_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

In [None]:
with open('hand_coords.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [None]:
class_name = "Y" # Add different classes here, and re-run the below code for each class (A-I and K-Y)

In [None]:
cap = cv2.VideoCapture(0)
    
while cap.isOpened():
    ret, frame = cap.read()
    
    # Recolor Feed
    image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
    
    # Make Detections
    results = detector.detect(image)
    
    annotated_image = draw_landmarks_on_image(image.numpy_view(), results)

    # Export coordinates
    try:
        left = results.hand_landmarks[0]
        left_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in left]).flatten())
        left_row.insert(0, class_name)

        with open('hand_coords.csv', mode='a', newline='') as f:
            csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_writer.writerow(left_row)
    except:
        pass

    try:
        right = results.hand_landmarks[1] 
        right_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in right]).flatten())
        right_row.insert(0, class_name)

        with open('hand_coords.csv', mode='a', newline='') as f:
            csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_writer.writerow(right_row)
    except:
        pass

    cv2.imshow('Raw Webcam Feed', annotated_image)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
df = pd.read_csv('hand_coords.csv')

In [None]:
df

In [None]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234) # 70% training data, 30% testing data

In [None]:
y_test

In [None]:
# increase the max_iter parameter if the model is not converging
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression(max_iter=300, class_weight='balanced')),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier())
} 

In [None]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

In [None]:
fit_models

In [None]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

In [None]:
# save the best model
with open('sign_detection.pkl', 'wb') as f:
    pickle.dump(fit_models['lr'], f)

In [None]:
# load the model
with open('sign_detection.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

In [None]:
loaded_model

In [None]:
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()

    image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
    
    # Make Detections
    results = detector.detect(image)
    
    annotated_image = draw_landmarks_on_image(image.numpy_view(), results)
       
    # Export coordinates
    try:
        left = results.hand_landmarks[0]
        left_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in left]).flatten())
        
        X = pd.DataFrame([left_row])
        
        # predict the class
        sign_det_class = loaded_model.predict(X)[0]
        sign_det_prob = loaded_model.predict_proba(X)[0]
    
        cv2.rectangle(annotated_image, (0,0), (250,60), (245, 117, 16), -1)

        cv2.putText(annotated_image, 'CLASS', (95,12), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(annotated_image, sign_det_class.split(' ')[0], (90,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        cv2.putText(annotated_image, 'PROB', (15,12),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(annotated_image, str(round(sign_det_prob[np.argmax(sign_det_prob)],2)), (10,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

    except:
        pass

    try:
        right = results.hand_landmarks[1]
        right_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in right]).flatten())

        X = pd.DataFrame([right_row])

        # predict the class
        sign_det_class = loaded_model.predict(X)[0]
        sign_det_prob = loaded_model.predict_proba(X)[0]

        cv2.rectangle(annotated_image, (0,0), (250,60), (245, 117, 16), -1)

        cv2.putText(annotated_image, 'CLASS', (95,12), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(annotated_image, sign_det_class.split(' ')[0], (90,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        cv2.putText(annotated_image, 'PROB', (15,12),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
        cv2.putText(annotated_image, str(round(sign_det_prob[np.argmax(sign_det_prob)],2)), (10,60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
    except:
        pass

    cv2.imshow('Predictions', annotated_image)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()