Verions
Python: 3.12.3
Mediapipe: 0.10.14
OpenCV: 4.10.0

In [13]:
import mediapipe as mp
import cv2
from collections import Counter
import time  # To preprare myself before capturing the data 
import csv
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import pickle

In [2]:
"""
This will help us draw our different detection from the holistic model to the screen
"""

mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

In [11]:
CSV_FILE = "body_landmark.csv"
SLEEP_TIME = 1

In [12]:
def vote_counter(voting_dictionary: dict, return_type: str = "common"):
    """
    Measures the most common occurrence in a dictionary of votes.

    Parameters:
    ----------
    voting_dictionary : dict
        A dictionary where keys represent vote identifiers and values represent votes.
        Example: {"A1": 1, "A2": 1, "A3": 1, "A4": 4, "A5": 5}

    return_type : str, optional
        Specifies the format of the output. Accepted values are:
        - "common" (default): Returns the most common value.
          Example Output: 1
        - "list": Returns a list of tuples with each value and its count.
          Example Output: [(1, 3), (4, 1), (5, 1)]
        - "tuple": Returns the most common value and its count as a tuple.
          Example Output: (1, 3)

    Returns:
    -------
    int, list, or tuple
        The output depends on the value of the `return_type` parameter.

    Example:
    -------
    >> voting_dict = {"A1": 1, "A2": 1, "A3": 1, "A4": 4, "A5": 5}
    >> vote_counter(voting_dict, return_type="common")
    1
    >> vote_counter(voting_dict, return_type="list")
    [(1, 3), (4, 1), (5, 1)]
    >> vote_counter(voting_dict, return_type="tuple")
    (1, 3)
    """

    vote_counts = Counter(voting_dictionary.values())
    if return_type.lower() == 'common':
        majority_vote = vote_counts.most_common(1)[0][0]
        return majority_vote
    elif return_type.lower() == 'list':
        majority_vote = vote_counts.most_common()
        return majority_vote
    elif return_type.lower() == 'tuple':
        majority_vote = vote_counts.most_common(1)[0]
        return majority_vote
    else:
        raise TypeError("Parameter 'return_type' was assigned a value which is not defined.\n"
                        "Defined values are: 'common', 'list', 'tuple'")



# Detection

In [80]:
# Press "Q" to end

cap = cv2.VideoCapture(0)

# Initialize Holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    while cap.isOpened():
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)

        # Recolor Feed From BGR to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Making Detection
        results = holistic.process(image)
        # print(results.pose_landmarks)
        # pose_landmarks, face_landmarks, left_hand_landmarks, right_hand_landmarks

        # Recolor Feed From RGB to BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        image.flags.writeable = True

        # Drawing Face Landmarks
        # Use FACEMESH_CONTOURS or FACEMESH_TESSELATION depending on your needs.
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                                mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                                mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
                                )

        #Right Hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                                )

        #Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                                )

        #Pose detection (Full Body)
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                                )


        cv2.imshow('Face Detection Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

# Custom CSV data collection

In [82]:
#Here we are just detecting for pose and face landmarks so we need the total Length of the both face and pose

try:
    coord_len = len(results.pose_landmarks.landmark) + len(results.face_landmarks.landmark)
    print(coord_len)  # 501

except Exception:
    coord_len = 501


501


In [83]:
landmarks = ["class"]
for i in range(1, coord_len + 1):
    landmarks += ["x{}".format(i), "y{}".format(i), "z{}".format(i), "v{}".format(i)]

In [85]:
with open(CSV_FILE, "w", newline="") as csvfile:
    csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [93]:
CLASS_NAME = 'Happy'


print("Prepare Yourself")
time.sleep(SLEEP_TIME)
print("Camera is starting")


# Press "Q" to end

cap = cv2.VideoCapture(0)

# Initialize Holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    while cap.isOpened():
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)

        # Recolor Feed From BGR to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Making Detection
        results = holistic.process(image)
        # print(results.pose_landmarks)
        # pose_landmarks, face_landmarks, left_hand_landmarks, right_hand_landmarks

        # Recolor Feed From RGB to BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        image.flags.writeable = True

        # Drawing Face Landmarks
        # Use FACEMESH_CONTOURS or FACEMESH_TESSELATION depending on your needs.
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                                mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                                mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
                                )

        #Right Hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                                )

        #Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                                )

        #Pose detection (Full Body)
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                                )

        # Export data to CSV
        try:
            pose_landmarks = results.pose_landmarks.landmark
            pose_row = np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose_landmarks]).flatten()
            face_landmarks = results.face_landmarks.landmark
            face_row = np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face_landmarks]).flatten()

            # Reshaping the arrays to have the same shape
            pose_row = pose_row.reshape(-1, 1)
            face_row = face_row.reshape(-1, 1)
            combined_row = np.concatenate((pose_row, face_row), axis=0)
            # combined_row.insert(0, CLASS_NAME)
            combined_row = [CLASS_NAME] + [str(i[0]) for i in combined_row]


            with open(CSV_FILE, "a", newline="") as csvfile:
                csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(combined_row)
           

        except Exception as e:
            print("Exception while capturing Data")
            print(e)
            pass


        cv2.imshow('Face Detection Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

Prepare Yourself
Camera is starting
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType' object has no attribute 'landmark'
Exception while capturing Data
'NoneType

# Read the CSV file and train the model

In [99]:
df = pd.read_csv(CSV_FILE)
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,walking,0.550003,0.464902,-0.354316,0.999366,0.552843,0.439685,-0.320169,0.998791,0.556233,...,0.002468,0.0,0.567254,0.428869,0.017634,0.0,0.568685,0.427077,0.018543,0.0
1,walking,0.550094,0.46473,-0.345516,0.999252,0.552839,0.439489,-0.309739,0.998522,0.556167,...,0.00383,0.0,0.5645,0.432628,0.019536,0.0,0.565819,0.430909,0.020569,0.0
2,walking,0.550363,0.4647,-0.292458,0.999269,0.552935,0.439287,-0.25128,0.99857,0.556216,...,0.002656,0.0,0.566561,0.431336,0.019355,0.0,0.567771,0.429431,0.020433,0.0
3,walking,0.55078,0.464667,-0.239807,0.99932,0.553607,0.439325,-0.196496,0.998682,0.557036,...,0.003022,0.0,0.568424,0.434893,0.017913,0.0,0.569931,0.43295,0.018858,0.0
4,walking,0.553523,0.464407,-0.269046,0.999324,0.55685,0.439312,-0.226215,0.998736,0.560649,...,0.001748,0.0,0.572347,0.431665,0.014463,0.0,0.5739,0.429748,0.015153,0.0


In [100]:
x_features = df.drop("class", axis=1)  # Contains all the coodinates
y_target = df["class"]  # contains all the classes

In [102]:
x_features.head(), y_target.head()

(         x1        y1        z1        v1        x2        y2        z2  \
 0  0.550003  0.464902 -0.354316  0.999366  0.552843  0.439685 -0.320169   
 1  0.550094  0.464730 -0.345516  0.999252  0.552839  0.439489 -0.309739   
 2  0.550363  0.464700 -0.292458  0.999269  0.552935  0.439287 -0.251280   
 3  0.550780  0.464667 -0.239807  0.999320  0.553607  0.439325 -0.196496   
 4  0.553523  0.464407 -0.269046  0.999324  0.556850  0.439312 -0.226215   
 
          v2        x3        y3  ...      z499  v499      x500      y500  \
 0  0.998791  0.556233  0.440187  ...  0.002468   0.0  0.567254  0.428869   
 1  0.998522  0.556167  0.440004  ...  0.003830   0.0  0.564500  0.432628   
 2  0.998570  0.556216  0.439829  ...  0.002656   0.0  0.566561  0.431336   
 3  0.998682  0.557036  0.439952  ...  0.003022   0.0  0.568424  0.434893   
 4  0.998736  0.560649  0.439953  ...  0.001748   0.0  0.572347  0.431665   
 
        z500  v500      x501      y501      z501  v501  
 0  0.017634   0.0  0

In [109]:
X_train, X_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, random_state=1220)

In [111]:
X_train, X_test, y_train, y_test

(            x1        y1        z1        v1        x2        y2        z2  \
 1252  0.553236  0.516239 -0.828167  0.999952  0.570037  0.467597 -0.770213   
 1628  0.514140  0.684100 -1.510088  0.998853  0.536647  0.616104 -1.508975   
 1399  0.398897  0.575800 -1.316567  0.999034  0.434816  0.520471 -1.367914   
 1621  0.518635  0.679187 -1.549651  0.999097  0.542673  0.611999 -1.546626   
 34    0.580621  0.438943 -0.539216  0.996237  0.589807  0.413359 -0.499597   
 ...        ...       ...       ...       ...       ...       ...       ...   
 396   0.511377  0.535470 -0.488599  0.999925  0.522173  0.493046 -0.425096   
 755   0.588257  0.539233 -0.541660  0.999955  0.608007  0.488781 -0.496814   
 337   0.472208  0.527787 -0.770961  0.999994  0.492040  0.475877 -0.704140   
 471   0.471470  0.555994 -0.615422  0.999992  0.484681  0.506840 -0.566728   
 867   0.552145  0.512027 -0.995426  0.999894  0.570626  0.465409 -0.941138   
 
             v2        x3        y3  ...      z499

## Model Selection and Model Building

In [112]:
model_pipeline = {
    "logestic_regression": make_pipeline(StandardScaler(), LogisticRegression()),
    "ridge_classifier": make_pipeline(StandardScaler(), RidgeClassifier()),
    "random_forest": make_pipeline(StandardScaler(), RandomForestClassifier()),
    "gradient_boosting": make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [113]:
fit_models = {}

for name, pipeline in model_pipeline.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[name] = model

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [114]:
fit_models

{'logestic_regression': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'ridge_classifier': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'random_forest': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gradient_boosting': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [116]:
for name, model in fit_models.items():
    yhat = model.predict(X_test)
    print(name, accuracy_score(y_test, yhat))

logestic_regression 0.9972375690607734
ridge_classifier 0.9917127071823204
random_forest 1.0
gradient_boosting 0.9972375690607734


In [119]:
for mode_name in fit_models.keys():
    with open(mode_name + ".pkl", "wb") as f:
        pickle.dump(fit_models[mode_name], f)


# Predicting using all model with a voting determination function

In [14]:
model_list = ["gradient_boosting", "logestic_regression", "random_forest", "ridge_classifier"]  # Name of all models
all_models = {}

for name in model_list:
    model_path = name + ".pkl"
    with open(model_path, 'rb') as file:
        all_models[name] = pickle.load(file)


In [15]:
all_models

{'gradient_boosting': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())]),
 'logestic_regression': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'random_forest': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'ridge_classifier': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())])}

In [34]:
# Press "Q" to end

cap = cv2.VideoCapture(0)

# Initialize Holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    while cap.isOpened():
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)

        # Recolor Feed From BGR to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Making Detection
        results = holistic.process(image)
        # print(results.pose_landmarks)
        # pose_landmarks, face_landmarks, left_hand_landmarks, right_hand_landmarks

        # Recolor Feed From RGB to BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        image.flags.writeable = True

        # Drawing Face Landmarks
        # Use FACEMESH_CONTOURS or FACEMESH_TESSELATION depending on your needs.
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                                mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                                mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
                                )

        #Right Hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                                )

        #Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                                )

        #Pose detection (Full Body)
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                                )

        try:
            pose_landmarks = results.pose_landmarks.landmark
            pose_row = np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose_landmarks]).flatten()
            face_landmarks = results.face_landmarks.landmark
            face_row = np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face_landmarks]).flatten()

            # Reshaping the arrays to have the same shape
            pose_row = pose_row.reshape(-1, 1)
            face_row = face_row.reshape(-1, 1)
            combined_row = np.concatenate((pose_row, face_row), axis=0)
            # combined_row.insert(0, CLASS_NAME)
            combined_row = [i[0] for i in combined_row]

            votes = {}

            X = pd.DataFrame([combined_row])

            for name, model in all_models.items():
                votes[name] = str(model.predict(X)[0])

            body_language_class = vote_counter(voting_dictionary=votes)

            # body_language_class = all_models['gradient_boosting'].predict(X)[0]
            # body_language_prob = all_models['gradient_boosting'].predict_proba(X)[0]
            # print(body_language_class, body_language_prob)

        
            coord = tuple(np.multiply(
                        np.array([results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
                        results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y]), 
                        [640, 480]).astype(int))
            cv2.rectangle(image,
                            (coord[0], coord[1] + 5),
                            (coord[0] + len(body_language_class) * 20, coord[1] - 30),
                            (245, 117, 16), -1)
            cv2.putText(image, body_language_class, coord, 
                        cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA
                        )

           

        except Exception as e:
            pass


        cv2.imshow('Face Detection Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()



In [24]:
cap.release()
cv2.destroyAllWindows()

'Sad'