# 0. Install and Import Dependencies

In [25]:
!pip install mediapipe opencv-python pandas scikit-learn



In [1]:
import mediapipe as mp # Import mediapipe
import cv2 # Import opencv

In [2]:

# First step is to initialize the Hands class an store it in a variable
mp_hands = mp.solutions.hands

# Now second step is to set the hands function which will hold the landmarks points
#hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.3)

# Last step is to set up the drawing function of hands landmarks on the image
mp_drawing = mp.solutions.drawing_utils

# 1. Make Some Detections

# Landmarks map
<img src=https://google.github.io/mediapipe/images/mobile/hand_landmarks.png />

In [4]:
relevant_landmarks = [0,16,18,20,22]

In [3]:
cap = cv2.VideoCapture(0)

with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands:
        while cap.isOpened():
            ret, frame = cap.read()

            # BGR 2 RGB
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Set flag
            image.flags.writeable = False

            # Detections
            results = hands.process(image)
            
            # Set flag to true
            image.flags.writeable = True

            # RGB 2 BGR
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)


            # Rendering results
            if results.multi_hand_landmarks:
                for num, hand in enumerate(results.multi_hand_landmarks):
                    mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, 
                                            mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                            mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                                             )
           


            cv2.imshow('Hand Tracking', image)
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

cap.release()
cv2.destroyAllWindows()

In [4]:
print(results.multi_hand_landmarks[0].landmark[mp_hands.HandLandmark.WRIST],
results.multi_hand_landmarks[0].landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP],
results.multi_hand_landmarks[0].landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP],
results.multi_hand_landmarks[0].landmark[mp_hands.HandLandmark.RING_FINGER_MCP],
results.multi_hand_landmarks[0].landmark[mp_hands.HandLandmark.PINKY_MCP])


x: 0.5122029185295105
y: 0.8202745914459229
z: 7.458144182237447e-07
 x: 0.746350884437561
y: 0.6298924684524536
z: -0.0102226622402668
 x: 0.7046052813529968
y: 0.5664680600166321
z: -0.004212702624499798
 x: 0.654805600643158
y: 0.5247756838798523
z: -0.0059512462466955185
 x: 0.5953828692436218
y: 0.5008100867271423
z: -0.012931078672409058



In [5]:
# find the center of the hand 

n = len(results.multi_hand_landmarks[0].landmark)
sum_x = 0
sum_y = 0
sum_z = 0
center = {}
for i in range(n):
    sum_x += results.multi_hand_landmarks[0].landmark[i].x
    sum_y += results.multi_hand_landmarks[0].landmark[i].y
    sum_z += results.multi_hand_landmarks[0].landmark[i].z
    
x_center = sum_x/n
y_center = sum_y/n
z_center = sum_z/n


center['x'] = x_center 
center['y'] = y_center 
center['z'] = z_center 

center




{'x': 0.7482098454520816, 'y': 0.5222516996519906, 'z': -0.03594293322005558}

# 2. Capture Landmarks & Export to CSV
<!--<img src="https://i.imgur.com/8bForKY.png">-->
<!--<img src="https://i.imgur.com/AzKNp7A.png">-->

In [6]:
import csv
import os
import numpy as np

In [7]:
num_coords = len(results.multi_hand_landmarks[0].landmark)
num_coords

21

In [8]:
landmarks = ['class']
for val in range(1, num_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

In [13]:
landmarks

['class',
 'x1',
 'y1',
 'z1',
 'v1',
 'x2',
 'y2',
 'z2',
 'v2',
 'x3',
 'y3',
 'z3',
 'v3',
 'x4',
 'y4',
 'z4',
 'v4',
 'x5',
 'y5',
 'z5',
 'v5',
 'x6',
 'y6',
 'z6',
 'v6',
 'x7',
 'y7',
 'z7',
 'v7',
 'x8',
 'y8',
 'z8',
 'v8',
 'x9',
 'y9',
 'z9',
 'v9',
 'x10',
 'y10',
 'z10',
 'v10',
 'x11',
 'y11',
 'z11',
 'v11',
 'x12',
 'y12',
 'z12',
 'v12',
 'x13',
 'y13',
 'z13',
 'v13',
 'x14',
 'y14',
 'z14',
 'v14',
 'x15',
 'y15',
 'z15',
 'v15',
 'x16',
 'y16',
 'z16',
 'v16',
 'x17',
 'y17',
 'z17',
 'v17',
 'x18',
 'y18',
 'z18',
 'v18',
 'x19',
 'y19',
 'z19',
 'v19',
 'x20',
 'y20',
 'z20',
 'v20',
 'x21',
 'y21',
 'z21',
 'v21']

In [14]:
data_file = 'coords_of_shape.csv'

with open(data_file,mode='w', newline='') as f: 
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [15]:
classes = ['word_h0_01']
#classes = ['shape_00','shape_01','shape_02','shape_03','shape_04','shape_05','shape_06','shape_07'] #put the names of the files here

In [18]:
for label in classes:
    # Load Video
    fn = label
    class_name = label[-2:] # only the number of the video
    cap = cv2.VideoCapture(f"C:/Users/hagar/OneDrive - mail.tau.ac.il/Desktop/Stage/LPC_2022/Hand decoder/Position/data/test_videos/{fn}.mp4")

    with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands:
        while cap.isOpened():
            ret, frame = cap.read()

            # BGR 2 RGB
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Set flag
            image.flags.writeable = False

            # Detections
            results = hands.process(image)
            
            # Set flag to true
            image.flags.writeable = True

            # RGB 2 BGR
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)


            # Rendering results
            if results.multi_hand_landmarks:
                for num, hand in enumerate(results.multi_hand_landmarks):
                    mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, 
                                            mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                            mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                                             )

            # Export coordinates
            try:
                # Extract Pose landmarks
                hand = results.multi_hand_landmarks[0].landmark
                hand_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in hand]).flatten())
                row = hand_row

                # Append class name 
                row.insert(0, class_name)

                # Export to CSV
                with open(data_file, mode='a', newline='') as f:
                    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                    csv_writer.writerow(row)

            except:
                pass

            cv2.imshow('hand_estimation', image)

            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

    print(f'{label} was learned')
    cap.release()
    cv2.destroyAllWindows()

word_h0_01 was learned


# 3. Train Custom Model Using Scikit Learn

## 3.1 Read in Collected Data and Process

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [11]:
df = pd.read_csv(r"C:\Users\hagar\OneDrive - mail.tau.ac.il\Desktop\Stage\LPC_2022\Hand decoder\Position\data\coords_position.csv")

In [12]:
df.head()

Unnamed: 0,class,x1,y1,z1,x2,y2,z2,x3,y3,z3,...,z30,x31,y31,z31,x32,y32,z32,x33,y33,z33
0,0,0.47271,0.52473,-0.475511,0.485259,0.494949,-0.444614,0.492754,0.494873,-0.444745,...,0.319577,0.431765,1.9265,0.376139,0.523671,1.975796,0.020084,0.44116,1.976516,0.054029
1,0,0.473372,0.522611,-0.489723,0.485682,0.492262,-0.458236,0.493825,0.491913,-0.45834,...,0.229015,0.431394,1.951722,0.107576,0.525847,1.993071,-0.049318,0.441042,2.000851,-0.202055
2,0,0.473899,0.520872,-0.522339,0.486252,0.49033,-0.4899,0.494807,0.489925,-0.489971,...,0.237235,0.431503,1.94994,0.207173,0.528592,1.993266,-0.015707,0.441408,2.001486,-0.106195
3,0,0.474279,0.519489,-0.534707,0.486711,0.4888,-0.503285,0.495543,0.488294,-0.503343,...,0.308221,0.428875,1.948126,0.258928,0.528549,1.992265,0.037789,0.43929,2.000137,-0.059013
4,0,0.47455,0.518794,-0.521482,0.487064,0.488003,-0.490144,0.496039,0.487493,-0.490219,...,0.31702,0.421591,1.946249,0.26099,0.522343,1.991866,0.04266,0.431875,1.998997,-0.051457


In [26]:
df.tail()

Unnamed: 0,class,x1,y1,z1,x2,y2,z2,x3,y3,z3,...,z30,x31,y31,z31,x32,y32,z32,x33,y33,z33
7351,4,0.482994,0.510142,-0.629055,0.494181,0.475171,-0.595769,0.501663,0.474687,-0.595895,...,0.294523,0.410745,2.067214,0.224063,0.528857,2.131766,-0.018198,0.426702,2.12616,-0.109316
7352,4,0.483007,0.511154,-0.630527,0.494196,0.476024,-0.597569,0.501677,0.475633,-0.597692,...,0.298766,0.410747,2.066162,0.225754,0.52883,2.130981,-0.01343,0.426674,2.125291,-0.107923
7353,4,0.483015,0.511255,-0.630561,0.494208,0.476138,-0.597693,0.50169,0.475782,-0.597818,...,0.298848,0.410982,2.064013,0.223081,0.528868,2.128735,-0.013095,0.426791,2.123121,-0.108779
7354,4,0.483044,0.511263,-0.6334,0.49422,0.476165,-0.600514,0.501704,0.475832,-0.600634,...,0.299108,0.410991,2.06369,0.225357,0.528802,2.12836,-0.012831,0.426678,2.123002,-0.107934
7355,4,0.483062,0.511295,-0.632105,0.494229,0.476224,-0.599413,0.501712,0.475921,-0.599538,...,0.29294,0.41146,2.063699,0.219419,0.528941,2.128354,-0.019065,0.427013,2.123007,-0.11413


In [119]:
df[df['class']==1]

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
959,1,0.475757,0.525373,-0.467482,0.999979,0.487011,0.494481,-0.435314,0.999947,0.494802,...,-0.002405,0,0.496272,0.492282,0.002498,0,0.498018,0.490138,0.002333,0
960,1,0.475805,0.525361,-0.503314,0.999979,0.487028,0.494457,-0.471228,0.999947,0.494817,...,-0.002387,0,0.496244,0.493435,0.002458,0,0.498027,0.490964,0.002335,0
961,1,0.475826,0.525081,-0.501486,0.999980,0.487028,0.494221,-0.466487,0.999949,0.494819,...,-0.001734,0,0.495772,0.495649,0.002947,0,0.497627,0.492401,0.002900,0
962,1,0.475923,0.524735,-0.487827,0.999981,0.487048,0.493992,-0.451113,0.999951,0.494840,...,-0.002166,0,0.496248,0.496055,0.002482,0,0.498072,0.492868,0.002433,0
963,1,0.475981,0.524722,-0.485956,0.999982,0.487068,0.493976,-0.449486,0.999953,0.494861,...,-0.002310,0,0.496319,0.495232,0.002239,0,0.498146,0.492254,0.002146,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3962,1,0.464132,0.523850,-0.564199,0.999987,0.478453,0.488426,-0.537470,0.999966,0.486821,...,-0.002345,0,0.492508,0.486761,0.003499,0,0.494346,0.484520,0.003399,0
3963,1,0.464241,0.524273,-0.575867,0.999987,0.478512,0.488637,-0.548930,0.999966,0.486891,...,-0.002354,0,0.492649,0.486839,0.003607,0,0.494487,0.484609,0.003516,0
3964,1,0.464366,0.524417,-0.581871,0.999987,0.478610,0.488725,-0.554887,0.999967,0.487008,...,-0.002366,0,0.492653,0.486830,0.003513,0,0.494484,0.484646,0.003416,0
3965,1,0.464377,0.524538,-0.595928,0.999987,0.478626,0.488814,-0.569151,0.999967,0.487021,...,-0.002585,0,0.492721,0.486789,0.003257,0,0.494552,0.484638,0.003149,0


In [13]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)

In [15]:
y_test

6259    4
38      0
5556    3
7019    4
7342    4
       ..
3327    2
4480    3
1456    1
4813    3
5736    3
Name: class, Length: 2207, dtype: int64

## 3.2 Train Machine Learning Classification Model

In [16]:
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [17]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [18]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [19]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())])}

In [20]:
fit_models['rc'].predict(X_test)

array([4, 0, 3, ..., 1, 3, 3], dtype=int64)

## 3.3 Evaluate and Serialize Model 

In [21]:
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle 

In [22]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

lr 0.9945627548708654
rc 0.9963751699139103
rf 0.9945627548708654


In [23]:
fit_models['rc'].predict(X_test)

array([4, 0, 3, ..., 1, 3, 3], dtype=int64)

In [14]:
y_test

6259    4
38      0
5556    3
7019    4
7342    4
       ..
3327    2
4480    3
1456    1
4813    3
5736    3
Name: class, Length: 2207, dtype: int64

In [24]:
with open('body_language.pkl', 'wb') as f:
    pickle.dump(fit_models['rc'], f)

# 4. Make Detections with Model

In [36]:
with open('body_language.pkl', 'rb') as f:
    model = pickle.load(f)

In [37]:
model

In [38]:
landmarks = ['predicted_class','probablitiy_of_pred']
for val in range(1, num_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

In [39]:
with open('position_estimation_by_frames.csv',mode='w', newline='') as f: 
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [41]:
# Load Video
fn = 'word_h0_01'
cap = cv2.VideoCapture(f"videos/{fn}.mp4")
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))

size = (frame_width, frame_height)


marked_video = cv2.VideoWriter(f'{fn}_marked.avi',cv2.VideoWriter_fourcc(*'MJPG'),30, size)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(image)
        
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
       
        # Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
                        

        # Export coordinates
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z] for landmark in pose]).flatten())
            row = pose_row
            
            # Name colls
            num_coords = len(results.pose_landmarks.landmark)
            col_name = []
            for val in range(1, num_coords+1):
                col_name += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val)]

            # Make Detections
            X = pd.DataFrame([row], columns = col_name)
            predicted_position = model.predict(X)[0]
            position_prob = model.predict_proba(X)[0]
            
            
            # Append prediction class and probability 
            row.insert(0, predicted_position)
            row.insert(1, position_prob)
            
            
            # Write prediction to a CSV
            with open('position_estimation_by_frames.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(row)
                
                
            # Write prediction on video
            font = cv2.FONT_HERSHEY_SIMPLEX
            
            # Get status box
            cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
            
            # Display Class
            cv2.putText(image, 'Pred Position',
                         (95,12), font, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, predicted_position,
                         (90,40), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
                    
            
            # Display Probability
            cv2.putText(image, 'PROB'
                        , (15,12), font, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, str(round(predicted_position[np.argmax(position_prob)],2))
                        , (10,40), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            
            
        except:
            pass


        cv2.imshow('cued_estimated', image)
        marked_video.write(image)


        if cv2.waitKey(10) & 0xFF == ord('q'):
            break


marked_video.release()
cap.release()
cv2.destroyAllWindows()
print("The video was successfully saved")

The video was successfully saved


In [None]:

#Syntax: cv2.rectangle(image, start_point, end_point, color, thickness)
#Syntax: cv2.putText(image, text, org, font, fontScale, color[, thickness[, lineType[, bottomLeftOrigin]]])
# Write prediction on video
            font = cv2.FONT_HERSHEY_SIMPLEX
           
            
            # Get status box
            cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
            
            # Display Class
            cv2.putText(image, 'Pred Position'
                        , (95,12), font, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, predicted_position,
                        , (90,40), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
                    
            
            # Display Probability
            cv2.putText(image, 'PROB'
                        , (15,12), font, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, str(round(predicted_position[np.argmax(position_prob)],2))
                        , (10,40), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            
            marked_video.release()

            