In [2]:
#!pip install mediapipe opencv-python scikit-learn

In [3]:
import mediapipe as mp  #import mediapipe
import cv2              #import opencvsudo apt-get install python3-opencv

In [6]:
mp_drawing = mp.solutions.drawing_utils   # Drawing helpers (draw the different components of the model)
mp_holistic = mp.solutions.holistic       # Mediapipe Solutions 

### Make some detection

In [33]:
cap = cv2.VideoCapture(0) #might be different for you
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Make Detections
        results = holistic.process(image)  #Store the results of the model in variable results
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                 mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                 )
        
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [32]:
cap.release()
cv2.destroyAllWindows()

In [8]:
#results.face_landmarks

landmark {
  x: 0.37196439504623413
  y: 0.6029144525527954
  z: -0.015364538878202438
}
landmark {
  x: 0.3765926659107208
  y: 0.5665925145149231
  z: -0.04236150532960892
}
landmark {
  x: 0.3739210069179535
  y: 0.5739887356758118
  z: -0.01918364129960537
}
landmark {
  x: 0.3721824586391449
  y: 0.5200304388999939
  z: -0.03734074905514717
}
landmark {
  x: 0.37794339656829834
  y: 0.554385244846344
  z: -0.04644125699996948
}
landmark {
  x: 0.37891870737075806
  y: 0.5358776450157166
  z: -0.0451979897916317
}
landmark {
  x: 0.38058239221572876
  y: 0.48825138807296753
  z: -0.029124794527888298
}
landmark {
  x: 0.32083481550216675
  y: 0.46679916977882385
  z: -0.0035834137815982103
}
landmark {
  x: 0.382322758436203
  y: 0.4591526687145233
  z: -0.027611635625362396
}
landmark {
  x: 0.38382306694984436
  y: 0.44362837076187134
  z: -0.03166532889008522
}
landmark {
  x: 0.38803794980049133
  y: 0.37907665967941284
  z: -0.030963150784373283
}
landmark {
  x: 0.37142878770

# Capture Landmarks and Export to CSV 

In [10]:
#<img src="https://i.imgur.com/AzKNp7A.png">

In [10]:
import csv
import os
import numpy as np

In [11]:
nums_coords = len(results.pose_landmarks.landmark) + len(results.face_landmarks.landmark) # 33 + 468 = 501 i.e. total number of landmarks in the model

In [12]:
landmarks = ['class']
for val in range(1, nums_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)] # x = x coordinate, y = y coordinate, z = z coordinate, v = visibility

In [13]:
#landmarks

['class',
 'x1',
 'y1',
 'z1',
 'v1',
 'x2',
 'y2',
 'z2',
 'v2',
 'x3',
 'y3',
 'z3',
 'v3',
 'x4',
 'y4',
 'z4',
 'v4',
 'x5',
 'y5',
 'z5',
 'v5',
 'x6',
 'y6',
 'z6',
 'v6',
 'x7',
 'y7',
 'z7',
 'v7',
 'x8',
 'y8',
 'z8',
 'v8',
 'x9',
 'y9',
 'z9',
 'v9',
 'x10',
 'y10',
 'z10',
 'v10',
 'x11',
 'y11',
 'z11',
 'v11',
 'x12',
 'y12',
 'z12',
 'v12',
 'x13',
 'y13',
 'z13',
 'v13',
 'x14',
 'y14',
 'z14',
 'v14',
 'x15',
 'y15',
 'z15',
 'v15',
 'x16',
 'y16',
 'z16',
 'v16',
 'x17',
 'y17',
 'z17',
 'v17',
 'x18',
 'y18',
 'z18',
 'v18',
 'x19',
 'y19',
 'z19',
 'v19',
 'x20',
 'y20',
 'z20',
 'v20',
 'x21',
 'y21',
 'z21',
 'v21',
 'x22',
 'y22',
 'z22',
 'v22',
 'x23',
 'y23',
 'z23',
 'v23',
 'x24',
 'y24',
 'z24',
 'v24',
 'x25',
 'y25',
 'z25',
 'v25',
 'x26',
 'y26',
 'z26',
 'v26',
 'x27',
 'y27',
 'z27',
 'v27',
 'x28',
 'y28',
 'z28',
 'v28',
 'x29',
 'y29',
 'z29',
 'v29',
 'x30',
 'y30',
 'z30',
 'v30',
 'x31',
 'y31',
 'z31',
 'v31',
 'x32',
 'y32',
 'z32',
 'v32',
 '

In [14]:
with open('coords.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

### 

In [16]:
class_name = 'Thumbs up' # Change this for each new class

In [17]:
cap = cv2.VideoCapture(0) #might be different for you
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Make Detections
        results = holistic.process(image)  #Store the results of the model in variable results
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                 mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                 )
        
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        
        # Export coordinates    
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Extract Face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())
            
            # Concate rows
            row = pose_row+face_row
            
            # Append class name
            row.insert(0, class_name)
            
            # Export to CSV
            with open('coords.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(row)

        except:
            pass        
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [33]:
cap.release()
cv2.destroyAllWindows()

# Train Custom Model Using Scikit Learn

# 1) Read in collected Data and Process

In [15]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [16]:
df = pd.read_csv('coords.csv')

In [17]:
x = df.drop('class', axis=1) #features
y = df['class']  #Target variable

In [18]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1234)

In [22]:
x_train

Unnamed: 0,x1,y1,z1,v1,x2,y2,z2,v2,x3,y3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
4767,0.592480,0.385028,-1.013823,0.999933,0.617316,0.321265,-0.964966,0.999830,0.631286,0.319392,...,-0.009197,0.0,0.646644,0.320932,0.008073,0.0,0.651015,0.314393,0.008563,0.0
1891,0.505961,0.296079,-0.577866,0.999652,0.526463,0.235807,-0.539228,0.998903,0.541519,0.235045,...,-0.012879,0.0,0.546270,0.244134,0.004400,0.0,0.550124,0.241401,0.004294,0.0
1705,0.504032,0.510428,-0.678017,0.999885,0.524235,0.455022,-0.634660,0.999813,0.537052,0.455398,...,-0.009402,0.0,0.544931,0.444323,0.005035,0.0,0.548473,0.442347,0.004930,0.0
434,0.607083,0.540775,-1.426183,0.999930,0.636127,0.461102,-1.358762,0.999829,0.655731,0.461603,...,-0.019538,0.0,0.686722,0.448664,0.007902,0.0,0.691889,0.444303,0.008170,0.0
992,0.566506,0.669465,-1.776249,0.999774,0.602097,0.580990,-1.763440,0.999465,0.625212,0.574265,...,-0.042729,0.0,0.644591,0.537176,-0.031839,0.0,0.650369,0.528324,-0.033068,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664,0.566248,0.530476,-1.216194,0.999932,0.600238,0.448398,-1.151342,0.999826,0.618640,0.446828,...,-0.024388,0.0,0.638119,0.448399,-0.002380,0.0,0.643518,0.443356,-0.002668,0.0
3276,0.547890,0.609833,-1.404012,0.999616,0.573598,0.527701,-1.382918,0.999276,0.591200,0.525516,...,-0.020108,0.0,0.601855,0.515137,-0.010733,0.0,0.605556,0.507986,-0.011011,0.0
1318,0.605378,0.555061,-1.444949,0.999988,0.640095,0.467245,-1.381429,0.999976,0.660514,0.465019,...,-0.025619,0.0,0.702837,0.466305,-0.002569,0.0,0.707934,0.462180,-0.002919,0.0
723,0.609261,0.545811,-1.547911,0.999924,0.638250,0.461174,-1.484182,0.999828,0.659137,0.459771,...,-0.024955,0.0,0.683942,0.455006,-0.000951,0.0,0.689208,0.449636,-0.001091,0.0


# 2)Train Machine Learning Classification Model

In [19]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler #normalized the data

# MI Algorithms
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [20]:
pipeline = {
    'lr': make_pipeline(StandardScaler(), LogisticRegression()),
    'rf': make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb': make_pipeline(StandardScaler(), GradientBoostingClassifier()),
    'rc': make_pipeline(StandardScaler(), RidgeClassifier())
}

In [21]:
fit_models = {} #dictionary to store the models
for algo, pipeline in pipeline.items():
    model = pipeline.fit(x_train, y_train)
    fit_models[algo] = model
    print(algo, 'model trained.')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


lr model trained.
rf model trained.
gb model trained.
rc model trained.


# 3) Evaluate and Serialize Model

In [22]:
from sklearn.metrics import accuracy_score #to check the accuracy of the model
import pickle  # a library to save the model

In [23]:
for algo, model in fit_models.items():
    yhat = model.predict(x_test) # store the predictions in yhat
    print (algo, accuracy_score(y_test, yhat))

lr 0.9993957703927493
rf 1.0
gb 1.0
rc 0.9861027190332327


In [24]:
with open('Pose_Detection_model.pkl', 'wb') as file:
    pickle.dump(fit_models['rf'], file)

# 4) Make Detections with Model

In [25]:
with open('Pose_Detection_model.pkl', 'rb') as file:
    model = pickle.load(file)

In [28]:
cap = cv2.VideoCapture(0) #might be different for you
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.8, min_tracking_confidence=0.8) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Make Detections
        results = holistic.process(image)  #Store the results of the model in variable results
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                 mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                 )
        
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        
        # Export coordinates    
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Extract Face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())
            
            # Concate rows
            row = pose_row+face_row
            
            # Make Detections
            X = pd.DataFrame([row])
            body_language_class = model.predict(X)[0]
            body_language_prob = model.predict_proba(X)[0]
            #print(body_language_class, body_language_prob)

            # Grab ear coords
            coords = tuple(np.multiply(
                            np.array(
                                (results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
                                 results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y))
                            , [640,480]).astype(int))
            
            cv2.rectangle(image, 
                          (coords[0], coords[1]+5), 
                          (coords[0]+len(body_language_class)*20, coords[1]-30), 
                          (245, 117, 16), -1)
            
            
            cv2.putText(image, body_language_class, coords, 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        except:
            pass        
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()



In [47]:
cap.release()
cv2.destroyAllWindows()