In [1]:
import os 
import mediapipe as mp 
from yolov5 import YOLOv5
import cv2
import numpy as np 
import pandas as pd 
import warnings 
warnings.filterwarnings(action='ignore')

In [2]:

# Load YOLO model
yolo_model = YOLOv5("yolov5s.pt")  # Pre-trained YOLOv5 model

# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

# Dictionary to store landmarks from the previous frame
prev_landmarks = {}

YOLOv5  2024-11-25 Python-3.11.5 torch-2.5.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 270 layers, 7235389 parameters, 0 gradients, 16.6 GFLOPs
Adding AutoShape... 


In [3]:
# Function to compute Euclidean distance
def euclidean_distance(point1, point2):
    return math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)

In [4]:
def classify_activity (landmarks, prev_landmarks, bbox_id):
    text = ''
    left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP]
    right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP]
    left_knee = landmarks[mp_pose.PoseLandmark.LEFT_KNEE]
    right_knee = landmarks[mp_pose.PoseLandmark.RIGHT_KNEE]
    left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER]
    right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER]

    # Compute average positions for hips, knees, and shoulders
    avg_hip_y = (left_hip.y + right_hip.y) / 2
    avg_knee_y = (left_knee.y + right_knee.y) / 2
    avg_shoulder_y = (left_shoulder.y + right_shoulder.y) / 2

    movement_detection = False
    if bbox_id in prev_landmarks:
        prev_hib = prev_landmarks[bbox_id]
        current_hib = ((left_hip.x + right_hip.x) / 2, avg_hip_y)
        displacement = euclidean_distance(current_hib, prev_hib)
        if displacement >0.5:
            movement_detection = True
        prev_landmarks[bbox_id] = current_hib
        
    if movement_detection:
        text=  'moving'
    elif avg_hip_y < avg_knee_y  and avg_hip_y < avg_shoulder_y:
        text= "sitting"
    elif avg_hip_y > avg_knee_y and avg_shoulder_y > avg_hip_y:
        text= 'standing'
    else:
        text= 'unknown'

    return text, prev_landmarks
    

In [5]:
landmark_data = []
cap = cv2.VideoCapture('./factory_video.mp4')
while True:
    ret, frame = cap.read()
    if not ret:
        break
    # step one detect the human by YOLO 
    detections = yolo_model.predict(frame)
    detections = detections.xyxy[0]
    for idx, (*box, conf, cls) in enumerate(detections.tolist()):
        x1, y1, x2, y2 = map(int, box)   # Bounding box coordinates
        if int(cls) == 0: # check if the object is human 
            cv2.rectangle(frame, (x1,y1),(x2,y2), (0,255,0),2)

            # Step 2: Crop region for each person
            person_crop = frame[y1:y2,x1:x2]
            if person_crop.size>0:
                rgb_crop = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
                results = pose.process(rgb_crop)
                
                # Draw pose landmarks on the cropped person, and save the landmarks, create activities and label it 
                if results.pose_landmarks:  
                    # mp.solutions.drawing_utils.draw_landmarks(
                    #     frame[y1:y2, x1:x2], results.pose_landmarks, mp_pose.POSE_CONNECTIONS
                    # )

                    landmarks = results.pose_landmarks.landmark
                    landmark_list = [
                        {"x": lm.x, "y": lm.y, "z": lm.z, "visibility": lm.visibility} for lm in landmarks
                    ]
                    landmark_data.append({"landmarks": landmark_list, "label": "unknown activity"})

                    activity, prev_landmarks = classify_activity(landmarks, prev_landmarks, idx)
                    label = f"{activity.upper()}"
                    cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
                    
                    


    


    # Display the frame with bounding boxes and poses
    cv2.imshow("Multi-Worker Activity Recognition", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
            

In [14]:
landmark_data = []
output_dir = './cropped_landmarks'
cap = cv2.VideoCapture('./picnic.MP4')
while True:
    ret, frame = cap.read()
    if not ret:
        break
    # step one detect the human by YOLO 
    detections = yolo_model.predict(frame)
    detections = detections.xyxy[0]
    for idx, (*box, conf, cls) in enumerate(detections.tolist()):
        x1, y1, x2, y2 = map(int, box)   # Bounding box coordinates
        if int(cls) == 0: # check if the object is human 
            cv2.rectangle(frame, (x1,y1),(x2,y2), (0,255,0),2)

            # Step 2: Crop region for each person
            person_crop = frame[y1:y2,x1:x2]
            if person_crop.size>0:
                rgb_crop = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
                results = pose.process(rgb_crop)
                
                # Draw pose landmarks on the cropped person, and save the landmarks, create activities and label it 
                if results.pose_landmarks:  
                    mp.solutions.drawing_utils.draw_landmarks(
                        frame[y1:y2, x1:x2], results.pose_landmarks, mp_pose.POSE_CONNECTIONS
                    )

                    landmarks = results.pose_landmarks.landmark

                    activity, prev_landmarks = classify_activity(landmarks, prev_landmarks, idx)
                    label = f"{activity.upper()}"
                    cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)


                    output_path = os.path.join(output_dir, f"person_{idx}.jpg")
                    cv2.imwrite(output_path, cv2.cvtColor(rgb_crop, cv2.COLOR_RGB2BGR))
                    
                    left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP]
                    right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP]
                    left_knee = landmarks[mp_pose.PoseLandmark.LEFT_KNEE]
                    right_knee = landmarks[mp_pose.PoseLandmark.RIGHT_KNEE]
                    left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER]
                    right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER]
                    landmark_data.append({"left_hip": left_hip,
                                          'right_hip':right_hip,
                                          'left_knee':left_knee,
                                          'right_knee':right_knee,
                                          'left_shoulder':left_shoulder,
                                          'right_shoulder':right_shoulder,
                                          
                                          'avg_hip_y' : (left_hip.y + right_hip.y) / 2,
                                          "avg_knee_y": (left_knee.y + right_knee.y) / 2,
                                          'avg_shoulder_y':(left_shoulder.y + right_shoulder.y) / 2,
                    
                                           "label": activity})
                    


    


    # Display the frame with bounding boxes and poses
    cv2.imshow("Multi-Worker Activity Recognition", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
            

In [7]:
# pip uninstall opencv-python-headless


In [15]:
land_test=pd.DataFrame(landmark_data)

In [16]:
land_test[land_test.label =='unknown']

Unnamed: 0,left_hip,right_hip,left_knee,right_knee,left_shoulder,right_shoulder,avg_hip_y,avg_knee_y,avg_shoulder_y,label
0,x: 0.566478074\ny: 0.685459852\nz: -0.01127536...,x: 0.491862208\ny: 0.671730399\nz: 0.010376897...,x: 0.784370601\ny: 0.808060884\nz: -0.24978332...,x: 0.530128062\ny: 0.791544378\nz: 0.043508876...,x: 0.460121185\ny: 0.264222026\nz: -0.861997\n...,x: 0.479630202\ny: 0.252651066\nz: -1.164505\n...,0.678595,0.799803,0.258437,unknown
1,x: 0.567424476\ny: 0.501817584\nz: 0.040717780...,x: 0.398570985\ny: 0.486595094\nz: -0.0398743\...,x: 0.688692093\ny: 0.714880705\nz: -1.57472134...,x: 0.415501624\ny: 0.643930912\nz: -1.69549632...,x: 0.598439574\ny: 0.188856527\nz: 1.07208145\...,x: 0.348746568\ny: 0.200640708\nz: 1.01972103\...,0.494206,0.679406,0.194749,unknown
2,x: 0.469644934\ny: 0.587157249\nz: -0.09099782...,x: 0.561707795\ny: 0.548385084\nz: 0.091872394...,x: 0.47394979\ny: 0.57938832\nz: -0.315691739\...,x: 0.503100097\ny: 0.542989254\nz: 0.192711473...,x: 0.389316529\ny: 0.32203719\nz: -0.514768898...,x: 0.600299776\ny: 0.307200193\nz: -0.44076404...,0.567771,0.561189,0.314619,unknown
3,x: 0.56988734\ny: 0.675194204\nz: -0.048816788...,x: 0.438565373\ny: 0.664972782\nz: 0.046134922...,x: 0.641793847\ny: 0.448604614\nz: -0.729331\n...,x: 0.349974155\ny: 0.677567482\nz: -0.89585763...,x: 0.660899\ny: 0.283674031\nz: -1.19679022\nv...,x: 0.495477736\ny: 0.268685699\nz: -1.58863866...,0.670083,0.563086,0.27618,unknown
4,x: 0.58934\ny: 0.566697478\nz: -0.0926985443\n...,x: 0.343947798\ny: 0.575685322\nz: 0.091970153...,x: 0.629593432\ny: 0.674929\nz: -2.10937667\nv...,x: 0.32386151\ny: 0.701304913\nz: -2.15788555\...,x: 0.69547075\ny: 0.235184088\nz: 1.31877172\n...,x: 0.329201788\ny: 0.219434395\nz: 1.77386796\...,0.571191,0.688117,0.227309,unknown
5,x: 0.59617269\ny: 0.446520686\nz: 0.031333\nvi...,x: 0.329407424\ny: 0.451145858\nz: -0.03209230...,x: 0.58401686\ny: 0.608903468\nz: -0.178026542...,x: 0.34453395\ny: 0.623773277\nz: -0.025083644...,x: 0.671319842\ny: 0.208420739\nz: -0.24025136...,x: 0.213538736\ny: 0.209933132\nz: -0.38067418...,0.448833,0.616338,0.209177,unknown
6,x: 0.336207718\ny: 0.523672104\nz: 0.331762284...,x: 0.537399113\ny: 0.535024941\nz: -0.33250868...,x: 0.516902864\ny: 0.717216492\nz: 0.970066726...,x: 0.756860316\ny: 0.691903889\nz: 0.131936014...,x: 0.241901696\ny: 0.233575866\nz: -0.00261421...,x: 0.65929389\ny: 0.222677439\nz: -0.957467258...,0.529349,0.70456,0.228127,unknown
7,x: 0.346882075\ny: 0.512840927\nz: 0.114970438...,x: 0.465579867\ny: 0.514115632\nz: -0.1158152\...,x: 0.484816045\ny: 0.562307656\nz: 0.537505925...,x: 0.684436262\ny: 0.553392291\nz: 0.148234352...,x: 0.327681929\ny: 0.210174888\nz: 0.0588838\n...,x: 0.522927821\ny: 0.221800283\nz: -0.15037444...,0.513478,0.55785,0.215988,unknown
8,x: 0.497370154\ny: 0.439449191\nz: -0.01937797...,x: 0.311643064\ny: 0.448496848\nz: 0.019250664...,x: 0.41396302\ny: 0.61413151\nz: -0.0390855744...,x: 0.32268095\ny: 0.604488611\nz: 0.0643611401...,x: 0.63612622\ny: 0.190569267\nz: -0.91659683\...,x: 0.2259828\ny: 0.185289711\nz: -0.527446449\...,0.443973,0.60931,0.187929,unknown
9,x: 0.440903097\ny: 0.496502608\nz: 0.339106888...,x: 0.449144959\ny: 0.502915502\nz: -0.33934623...,x: 0.632425249\ny: 0.685575\nz: 0.0742937773\n...,x: 0.744548559\ny: 0.687131524\nz: -0.36851811...,x: 0.443397969\ny: 0.211700946\nz: -0.20428574...,x: 0.649984717\ny: 0.209384486\nz: -1.11413395...,0.499709,0.686353,0.210543,unknown


In [10]:
land_test.iloc[3,:4]

left_hip      x: 0.463583499\ny: 0.897460878\nz: 0.115582123...
right_hip     x: 0.492045969\ny: 0.917562306\nz: -0.11631032...
left_knee     x: 0.563165486\ny: 0.613656282\nz: 0.811140895...
right_knee    x: 0.53023994\ny: 0.673246205\nz: 0.422447115\...
Name: 3, dtype: object