In [1]:
import cv2 as cv
import os
import mediapipe as mp
import matplotlib.pyplot as plt
import numpy as np
import csv
import pandas as pd

In [4]:
# cap = cv.VideoCapture(0)
# if not cap.isOpened():
#     print("Cannot open camera")
#     exit()
# while True:
#     # Capture frame-by-frame
#     ret, frame = cap.read()
#     # if frame is read correctly ret is True
#     if not ret:
#         print("Can't receive frame (stream end?). Exiting ...")
#         break
#     # Our operations on the frame come here
#     gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
#     # Display the resulting frame
#     cv.imshow('frame', gray)
#     if cv.waitKey(1) == ord('q'):
#         break
# # When everything done, release the capture
# cap.release()
# cv.destroyAllWindows()

In [2]:
holistic_model = mp.solutions.holistic  # Holistic model to determine keypoints.
drawing_util = mp.solutions.drawing_utils  # Drawing utilities to draw keypoints.

In [3]:
def kp_detection(image, model):
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB) # Converting image to RGB from opencv's default BGR.
    image.flags.writeable = False  # Setting image to not writable.
    result = model.process(image) # Detecting keypoints from image.
    image.flags.writeable = True   # Setting image to writable.
    image = cv.cvtColor(image, cv.COLOR_RGB2BGR) # Converting image back to BGR from RGB.
    return image, result

In [4]:
def kp_drawing(image, results):
    # Draw face landmarks 
    drawing_util.draw_landmarks(image,
                                results.face_landmarks,
                                holistic_model.FACEMESH_CONTOURS,
                                drawing_util.DrawingSpec(color=(20,15,10), thickness=1, circle_radius=1), # Keypoint style
                                drawing_util.DrawingSpec(color=(255,255,100), thickness=1, circle_radius=1) # Keypoint connection style
                                )

    # Draw pose landmarks  
    drawing_util.draw_landmarks(image, 
                                results.pose_landmarks, 
                                holistic_model.POSE_CONNECTIONS,
                                drawing_util.DrawingSpec(color=(100,255,255), thickness=2, circle_radius=2),
                                drawing_util.DrawingSpec(color=(100,255,255), thickness=2, circle_radius=1)
                                )

    # Draw left hand landmarks
    drawing_util.draw_landmarks(image, 
                                results.left_hand_landmarks, 
                                holistic_model.HAND_CONNECTIONS,
                                drawing_util.DrawingSpec(color=(255,0,255), thickness=2, circle_radius=2),
                                drawing_util.DrawingSpec(color=(255,0,255), thickness=2, circle_radius=2)
                                )
    
    # Draw right hand landmarks 
    drawing_util.draw_landmarks(image, 
                                results.right_hand_landmarks, 
                                holistic_model.HAND_CONNECTIONS,
                                drawing_util.DrawingSpec(color=(255,0,0), thickness=2, circle_radius=2),
                                drawing_util.DrawingSpec(color=(255,0,0), thickness=2, circle_radius=2)
                                )

In [8]:
capture = cv.VideoCapture(0)
with holistic_model.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while capture.isOpened():
        ret, frame = capture.read()

        image, result = kp_detection(frame, holistic)

        kp_drawing(image, result)

        cv.imshow('My image', image)

        if cv.waitKey(10) & 0xFF == ord(' '):
            break

capture.release()
cv.destroyAllWindows()


KeyboardInterrupt: 

In [5]:
plt.imshow(cv.cvtColor(image, cv.COLOR_BGR2RGB))

NameError: name 'image' is not defined

In [5]:
# Extracting KeyPoints 

def extract_keypoints(results):
    """ A function to extract keypoints. """

    if result.right_hand_landmarks: # If the right hand was captured.
        # Assign to 'rh' the x,y and z keypoints of each landmark and append it to an array which is then flatttened into a 1-dimensional array. 
        rh = np.array([[mark.x, mark.y, mark.z] for mark in result.right_hand_landmarks.landmark]).flatten() 
    else:
        # Assign to 'rh' an array of zeros. 
        # rh has the same size in both instances.
        rh = np.zeros(63)


    # Extracting keypoints from the left hand landmarks
    if result.left_hand_landmarks: # If the left hand was captured.
        # Assign to 'lh' the x,y and z keypoints of each landmark and append it to an array which is then flatttened into a 1-dimensional array. 
        lh = np.array([[mark.x, mark.y, mark.z] for mark in result.left_hand_landmarks.landmark]).flatten() 
    else:
        # Assign to 'lh' an array of zeros. 
        # lh has the same size in both instances.
        lh = np.zeros(63)


    # # Extracting keypoints from the face landmarks
    # if result.face_landmarks: # If the face was captured.
    #     # Assign to 'face' the x,y and z keypoints of each landmark and append it to an array which is then flatttened into a 1-dimensional array. 
    #     face = np.array([[mark.x, mark.y, mark.z] for mark in result.face_landmarks.landmark]).flatten() 
    # else:
    #     # Assign to 'face' an array of zeros. 
    #     # face has the same size in both instances.
    #     face = np.zeros(1404)


    # # Extracting keypoints from the pose landmarks
    # if result.pose_landmarks: # If the pose was captured.
    #     # Assign to 'pose' the x,y,z and visibility keypoints of each landmark and append it to an array which is then flatttened into a 1-dimensional array. 
    #     pose = np.array([[mark.x, mark.y, mark.z, mark.visibility] for mark in result.pose_landmarks.landmark]).flatten() 
    # else:
    #     # Assign to 'pose' an array of zeros. 
    #     # pose has the same size in both instances.
    #     pose = np.zeros(132)

    # return np.concatenate([pose, face, lh, rh])
    return np.concatenate([lh, rh])

In [6]:
import os
os.getcwd()

'/home/daniel/Personal/Projects/gsl/notebooks'

In [9]:
signs = np.array(list('012')) # Signs we would try to detect.
num_imgs = 3 

In [9]:
# Data collection function
def data_collection(signs, img_num):
    capture = cv.VideoCapture(0)

    f = open('dataset.csv', mode='w', newline='')
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    with holistic_model.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        for sign in signs:
            for img in range(num_imgs):
            
                ret, frame = capture.read() 

                # Make keypoint detection.
                image, result = kp_detection(frame, holistic)

                # Draw landmarks 
                kp_drawing(image, result)

                # Show frame in a window.
                cv.imshow("OpenCV Feed", image) 

                # Applying Wait Logic
                if img == 0:
                    cv.putText(image, "STARTING COLLECTION", (120,200), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2, cv.LINE_AA)
                    cv.putText(image, "Collecting frames for sign {}.  Image Number {}".format(sign, img), (12,15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv.LINE_AA)
                    cv.imshow('OpenCV Feed', image) # Show to screen
                    cv.waitKey(5000)
                else:
                    cv.putText(image, "Collecting frames for sign {}.  Image Number {}".format(sign, img), (12,15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv.LINE_AA)
                    cv.imshow('OpenCV Feed', image) # Show to screen
                    cv.waitKey(3000)

                # Saving Keypoints in a csv file.
                keypoints = extract_keypoints(result)
                csv_writer.writerow(np.append(keypoints, sign))
                

                # If there is a keyup and the pressed key is the 'spacebar', break out of the loop.
                if cv.waitKey(10) & 0xFF == ord(' '): 
                    break

        capture.release() # Close the video capture object 
        cv.destroyAllWindows() # Close all OpenCV windows.
    f.close()

In [10]:
data_collection(signs, num_imgs)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/daniel/Personal/Projects/gsl/gsl_env/lib/python3.10/site-packages/cv2/qt/plugins"


NameError: name 'result' is not defined

: 

In [2]:
# Collecting Keypoint Values for Training and Testing.

capture = cv.VideoCapture(0)

f = open('../data/dataset.csv', mode='w', newline='')
csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

with holistic_model.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for sign in signs:
        for img in range(num_imgs):
         
            ret, frame = capture.read() 

            # Make keypoint detection.
            image, result = kp_detection(frame, holistic)

            # Draw landmarks 
            kp_drawing(image, result)

            # Show frame in a window.
            cv.imshow("OpenCV Feed", image) 

            # Applying Wait Logic
            if img == 0:
                cv.putText(image, "STARTING COLLECTION", (120,200), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2, cv.LINE_AA)
                cv.putText(image, "Collecting frames for sign {}.  Image Number {}".format(sign, img+1), (12,15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv.LINE_AA)
                cv.imshow('OpenCV Feed', image) # Show to screen
                cv.waitKey(5000)
            else:
                cv.putText(image, "Collecting frames for sign {}.  Image Number {}".format(sign, img+1), (12,15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv.LINE_AA)
                cv.imshow('OpenCV Feed', image) # Show to screen
                cv.waitKey(3000)

            # Saving Keypoints in a csv file.
            keypoints = extract_keypoints(result)
            csv_writer.writerow(np.append(keypoints, sign))
            

            # If there is a keyup and the pressed key is the 'spacebar', break out of the loop.
            if cv.waitKey(10) & 0xFF == ord(' '): 
                break

    capture.release() # Close the video capture object 
    cv.destroyAllWindows() # Close all OpenCV windows.
f.close()

NameError: name 'cv' is not defined

In [2]:
cols = [a+str(i) for i in range(1,43) for a in list("XYZ")]
cols.append('sign')
len(cols)

127

In [23]:
data = pd.read_csv('../data/dataset.csv', names=cols)
data

Unnamed: 0,X1,Y1,Z1,X2,Y2,Z2,X3,Y3,Z3,X4,...,X40,Y40,Z40,X41,Y41,Z41,X42,Y42,Z42,sign
0,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
1,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
2,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
3,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
4,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1955,0.753855,0.780516,-2.783092e-07,0.707944,0.740366,-0.023037,0.665090,0.681268,-0.046624,0.650422,...,0.647332,0.539427,-0.060122,0.621430,0.565719,-0.048807,0.594269,0.562218,-0.038206,positive
1956,0.754527,0.781368,-2.719811e-07,0.706204,0.740489,-0.024340,0.662884,0.682793,-0.048191,0.645441,...,0.640877,0.539529,-0.071622,0.617393,0.565177,-0.059336,0.593074,0.562472,-0.049709,positive
1957,0.756121,0.784600,-2.919269e-07,0.708100,0.740174,-0.024545,0.664918,0.682015,-0.049987,0.646768,...,0.644277,0.539889,-0.062538,0.618925,0.567810,-0.051054,0.591736,0.565675,-0.040139,positive
1958,0.755070,0.779666,-2.660178e-07,0.708111,0.739435,-0.022127,0.665490,0.680687,-0.045329,0.649144,...,0.647579,0.537988,-0.066173,0.623570,0.564143,-0.053956,0.595847,0.562509,-0.044221,positive


In [18]:
data.shape

(1540, 127)

In [38]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
data['sign'] = label_encoder.fit_transform(data.iloc[:, -1])
label_encoder.classes_

array([' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'equal',
       'negative', 'positive'], dtype=object)

In [39]:
data

Unnamed: 0,X1,Y1,Z1,X2,Y2,Z2,X3,Y3,Z3,X4,...,X40,Y40,Z40,X41,Y41,Z41,X42,Y42,Z42,sign
0,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
1,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
2,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
3,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
4,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
975,0.753855,0.780516,-2.783092e-07,0.707944,0.740366,-0.023037,0.665090,0.681268,-0.046624,0.650422,...,0.647332,0.539427,-0.060122,0.621430,0.565719,-0.048807,0.594269,0.562218,-0.038206,13
976,0.754527,0.781368,-2.719811e-07,0.706204,0.740489,-0.024340,0.662884,0.682793,-0.048191,0.645441,...,0.640877,0.539529,-0.071622,0.617393,0.565177,-0.059336,0.593074,0.562472,-0.049709,13
977,0.756121,0.784600,-2.919269e-07,0.708100,0.740174,-0.024545,0.664918,0.682015,-0.049987,0.646768,...,0.644277,0.539889,-0.062538,0.618925,0.567810,-0.051054,0.591736,0.565675,-0.040139,13
978,0.755070,0.779666,-2.660178e-07,0.708111,0.739435,-0.022127,0.665490,0.680687,-0.045329,0.649144,...,0.647579,0.537988,-0.066173,0.623570,0.564143,-0.053956,0.595847,0.562509,-0.044221,13


In [40]:
data.isnull().sum()

X1      0
Y1      0
Z1      0
X2      0
Y2      0
       ..
Z41     0
X42     0
Y42     0
Z42     0
sign    0
Length: 127, dtype: int64

In [24]:
data.sign.value_counts()

            140
0           140
1           140
2           140
3           140
4           140
5           140
6           140
7           140
8           140
9           140
equal       140
negative    140
positive    140
Name: sign, dtype: int64

In [42]:
data.iloc[:, -1]

0       0
1       0
2       0
3       0
4       0
       ..
975    13
976    13
977    13
978    13
979    13
Name: sign, Length: 980, dtype: int64

In [43]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(data.iloc[:, :-1], data.iloc[:, -1], test_size = 0.1, shuffle=True)

In [44]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
y_train.value_counts()

(882, 126) (98, 126) (882,) (98,)


5     66
3     65
9     65
0     64
4     64
12    64
6     64
10    63
11    63
2     63
8     61
1     60
7     60
13    60
Name: sign, dtype: int64

In [45]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [46]:
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("classifier", RandomForestClassifier())
])

# parameters = {
#     "classifier__max_depth" :np.arange(1,10)
# }

# grid = GridSearchCV(pipeline, parameters, cv=2)
# grid.fit(x_train, y_train)
pipeline.fit(x_train, y_train)

In [21]:
# grid.cv_results_
# grid.best_params_, grid.best_score_

In [47]:
pipeline.score(x_train, y_train)
# grid.score(x_train, y_train)

1.0

In [48]:
pipeline.predict(x_test)
# grid.predict(x_test)

array([10, 11, 11,  4,  2, 12, 13,  2,  7,  0,  7,  9,  6,  1,  1, 12,  5,
        7, 13, 11,  8,  7,  3,  8,  3, 13,  6, 11, 12, 10,  7,  4,  8,  7,
        9, 13,  7,  6,  8, 10, 13,  7,  1,  1,  4,  0,  7,  5,  1,  0,  8,
        5, 10, 12, 13,  9,  2,  5,  0,  1, 12,  1,  4,  0,  4,  0,  7,  2,
       12,  2,  5, 13, 13, 10,  4,  6,  1, 13, 11,  9,  3,  8,  9,  1,  4,
        8, 10,  6, 11, 10, 11,  9,  2,  2,  8,  1, 13,  8])

In [49]:
from sklearn.metrics import classification_report
y = classification_report(y_test, pipeline.predict(x_test))
# y = classification_report(y_test, grid.predict(x_test))

In [50]:
import pprint
pprint.pprint(y)

('              precision    recall  f1-score   support\n'
 '\n'
 '           0       1.00      1.00      1.00         6\n'
 '           1       0.90      0.90      0.90        10\n'
 '           2       0.86      0.86      0.86         7\n'
 '           3       1.00      0.60      0.75         5\n'
 '           4       0.86      1.00      0.92         6\n'
 '           5       0.60      0.75      0.67         4\n'
 '           6       1.00      0.83      0.91         6\n'
 '           7       0.90      0.90      0.90        10\n'
 '           8       1.00      1.00      1.00         9\n'
 '           9       0.83      1.00      0.91         5\n'
 '          10       1.00      1.00      1.00         7\n'
 '          11       1.00      1.00      1.00         7\n'
 '          12       1.00      1.00      1.00         6\n'
 '          13       1.00      1.00      1.00        10\n'
 '\n'
 '    accuracy                           0.93        98\n'
 '   macro avg       0.92      0.92      0.9

In [51]:
# Testing 

threshold = 0.7


capture = cv.VideoCapture(0) # Initialise the video capture object.

# While our video caputure is open, do
with holistic_model.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while capture.isOpened(): 
        ret, frame = capture.read() # Initialise the reading of the frame. 

        # Make keypoint detection.
        image, result = kp_detection(frame, holistic)

        # Draw landmarks 
        kp_drawing(image, result)

        # Prediction logic 
        keypoints = extract_keypoints(result)

        cla = pipeline.predict([keypoints])
        
        # cla = grid.predict([keypoints])

        # if pipeline.predict_proba([keypoints]).argmax() > threshold:
        #     cv.rectangle(image, (0,2), (60,40), (117,117,117), -1)
        #     cv.putText(image, str(np.array(signs)[pipeline.predict_proba([keypoints]).argmax()]), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3, cv.LINE_AA) 
        # else : 
        #     cv.rectangle(image, (0,2), (60,40), (117,117,117), -1)
        #     cv.putText(image, "NA", (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3, cv.LINE_AA)

        cv.rectangle(image, (0,2), (60,40), (117,117,117), -1)
        cv.putText(image, str(label_encoder.classes_[cla[0]]), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3, cv.LINE_AA)

        # Show frame in a window.
        cv.imshow("OpenCV Feed", image) 

        # If there is a keyup and the pressed key is the 'spacebar', break out of the loop.
        if cv.waitKey(10) & 0xFF == ord(' '): 
            break

capture.release() # Close the video capture object 
cv.destroyAllWindows() # Close all OpenCV windows.



In [27]:
import pickle
with open('models/model.pkl', 'wb') as model:
    pickle.dump(pipeline, model)

In [30]:
# pickle.loads(model)

In [3]:
import datetime
str(datetime.datetime.now())

'2022-09-08 22:54:06.048093'