In [2]:
import cv2
import mediapipe as mp

# Initialize mediapipe holistic model and drawing utilities
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Start capturing video from the webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is opened successfully
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.1, min_tracking_confidence=0.1) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()

        # If frame is not read correctly, handle the error
        if not ret or frame is None:
            print("Error: Could not read frame.")
            break

        # Recolor Feed to RGB for Mediapipe processing
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        

        # Make Detections
        results = holistic.process(image)

        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw landmarks
        # (drawing logic remains the same)

        # Show the processed image
        cv2.imshow('Raw Webcam Feed', image)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

# Release the video capture object and close windows
cap.release()
cv2.destroyAllWindows()


### Inference on video mediapipe

In [1]:
import cv2
import mediapipe as mp
import os

# Initialize mediapipe pose model and drawing utilities
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# Path to input video
video_path = r'C:\Users\Admin\Downloads\Body Language Detection with mediapipe\vid\9710109-uhd_3840_2160_25fps.mp4'

# Specify output folder and file
output_folder = 'output_videos'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
output_video_path = os.path.join(output_folder, r'C:\Users\Admin\Downloads\Body Language Detection with mediapipe\vid\MainProcessed.mp4')

# Start capturing video from the file
cap = cv2.VideoCapture(video_path)

# Get frame width, height, and FPS for saving the output video
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define the codec and create VideoWriter object to save the video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Initiate pose model
with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=1) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Recolor Feed to RGB for Mediapipe processing
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = pose.process(image)
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Draw Pose landmarks for the detected person
        if results.pose_landmarks:
            mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, 
                                      mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                      mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))
        
        # Write the processed frame to the output video
        out.write(image)
        
        # Show the processed image (optional)
        cv2.imshow('Processed Video Feed', image)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

# Release the video capture and writer objects, and close windows
cap.release()
out.release()
cv2.destroyAllWindows()


### process multiple images in a video

In [8]:
import cv2
import mediapipe as mp
import torch
import os
from ultralytics import YOLO

# Initialize Mediapipe pose model and drawing utilities
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# Load YOLOv8 model for person detection
yolo_model = YOLO('yolov8n.pt')  # Use 'yolov8n.pt' for lightweight model

# Path to input video
video_path = r'C:\Users\Admin\Downloads\Body Language Detection with mediapipe\vid\9710109-uhd_3840_2160_25fps.mp4'

# Specify output folder and file
output_folder = 'output_videos'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
output_video_path = os.path.join(output_folder, r'C:\Users\Admin\Downloads\Body Language Detection with mediapipe\vid\25fps.mp4')

# Start capturing video from the file
cap = cv2.VideoCapture(video_path)

# Get frame width, height, and FPS for saving the output video
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define the codec and create VideoWriter object to save the video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Initiate pose model
with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=1) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Use YOLO to detect people in the frame
        results = yolo_model(frame)
        detections = results[0]  # Get detections from YOLO
        
        # Process each person detected
        for det in detections.boxes:
            if det.cls == 0:  # Class 0 is for 'person' in YOLO
                # Get bounding box coordinates for each person
                x1, y1, x2, y2 = map(int, det.xyxy[0].cpu().numpy())

                # Extract the person from the frame using the bounding box
                person = frame[y1:y2, x1:x2]

                # Convert to RGB for Mediapipe processing
                person_rgb = cv2.cvtColor(person, cv2.COLOR_BGR2RGB)
                person_rgb.flags.writeable = False

                # Apply Mediapipe Pose estimation
                pose_results = pose.process(person_rgb)
                
                # Recolor the image back to BGR for display
                person_rgb.flags.writeable = True
                person_bgr = cv2.cvtColor(person_rgb, cv2.COLOR_RGB2BGR)

                # Draw Pose landmarks on the person
                if pose_results.pose_landmarks:
                    mp_drawing.draw_landmarks(person_bgr, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

                # Replace the processed person back into the original frame
                frame[y1:y2, x1:x2] = person_bgr
        
        # Write the processed frame to the output video
        out.write(frame)

        # Show the processed frame (optional)
        cv2.imshow('Processed Video Feed', frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

# Release the video capture and writer objects, and close windows
cap.release()
out.release()
cv2.destroyAllWindows()


0: 384x640 4 persons, 323.3ms
Speed: 13.4ms preprocess, 323.3ms inference, 5.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 160.0ms
Speed: 3.5ms preprocess, 160.0ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 169.3ms
Speed: 5.1ms preprocess, 169.3ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 222.4ms
Speed: 6.3ms preprocess, 222.4ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 141.4ms
Speed: 4.0ms preprocess, 141.4ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 118.2ms
Speed: 3.3ms preprocess, 118.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 147.5ms
Speed: 3.0ms preprocess, 147.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 152.0ms
Speed: 4.0ms preprocess, 152.0ms inference, 2.0ms postprocess per

### ON IMAGES IN A FOLDER

In [9]:
import cv2
import mediapipe as mp
import torch
import os
from ultralytics import YOLO

# Initialize Mediapipe pose model and drawing utilities
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# Load YOLOv8 model for person detection
yolo_model = YOLO('yolov8n.pt')  # Use 'yolov8n.pt' for lightweight model

# Specify input and output folder paths
input_folder = r'C:\Users\Admin\Downloads\data\im'
output_folder = r'C:\Users\Admin\Downloads\data\label'

# Ensure the output folder exists
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Initiate pose model
with mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=1) as pose:
    
    # Loop through each image in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith(('.jpg', '.jpeg', '.png')):  # Process only image files
            image_path = os.path.join(input_folder, file_name)

            # Read the image
            image = cv2.imread(image_path)

            # Use YOLO to detect people in the image
            results = yolo_model(image)
            detections = results[0]  # Get detections from YOLO
            
            # Process each person detected
            for det in detections.boxes:
                if det.cls == 0:  # Class 0 is for 'person' in YOLO
                    # Get bounding box coordinates for each person
                    x1, y1, x2, y2 = map(int, det.xyxy[0].cpu().numpy())

                    # Extract the person from the image using the bounding box
                    person = image[y1:y2, x1:x2]

                    # Convert to RGB for Mediapipe processing
                    person_rgb = cv2.cvtColor(person, cv2.COLOR_BGR2RGB)
                    person_rgb.flags.writeable = False

                    # Apply Mediapipe Pose estimation
                    pose_results = pose.process(person_rgb)
                    
                    # Recolor the image back to BGR for display
                    person_rgb.flags.writeable = True
                    person_bgr = cv2.cvtColor(person_rgb, cv2.COLOR_RGB2BGR)

                    # Draw Pose landmarks on the person
                    if pose_results.pose_landmarks:
                        mp_drawing.draw_landmarks(person_bgr, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                                  mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                                  mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

                    # Replace the processed person back into the original image
                    image[y1:y2, x1:x2] = person_bgr
            
            # Save the labeled image to the output folder
            output_image_path = os.path.join(output_folder, file_name)
            cv2.imwrite(output_image_path, image)

            print(f"Processed and saved: {output_image_path}")

# Release any resources if needed
cv2.destroyAllWindows()



0: 384x640 4 persons, 160.0ms
Speed: 19.0ms preprocess, 160.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved: C:\Users\Admin\Downloads\data\label\img_0.jpg

0: 384x640 4 persons, 106.2ms
Speed: 2.4ms preprocess, 106.2ms inference, 6.1ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved: C:\Users\Admin\Downloads\data\label\img_1.jpg

0: 384x640 4 persons, 132.2ms
Speed: 3.6ms preprocess, 132.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved: C:\Users\Admin\Downloads\data\label\img_2.jpg

0: 384x640 4 persons, 106.6ms
Speed: 9.8ms preprocess, 106.6ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved: C:\Users\Admin\Downloads\data\label\img_3.jpg

0: 384x640 4 persons, 135.7ms
Speed: 6.0ms preprocess, 135.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved: C:\Users\Admin\Downloads\data\label\img_4.jpg

0: 384x640 4 perso

### labelling images in a folder saving labels in same folder as image

In [13]:
import cv2
import mediapipe as mp
import torch
import os
from ultralytics import YOLO

# Initialize Mediapipe pose model and drawing utilities
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# Load YOLOv8 model for person detection
yolo_model = YOLO('yolov8n.pt')  # Use 'yolov8n.pt' for lightweight model

# Specify input and output folder paths
input_folder = r'C:\Users\Admin\Downloads\data\im'
output_folder = r'C:\Users\Admin\Downloads\data\label'

# Ensure the output folder exists
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Function to normalize coordinates
def normalize(value, max_value):
    return value / max_value

# Initiate pose model
with mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=1) as pose:
    
    # Loop through each image in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith(('.jpg', '.jpeg', '.png')):  # Process only image files
            image_path = os.path.join(input_folder, file_name)

            # Read the image
            image = cv2.imread(image_path)
            height, width, _ = image.shape

            # Use YOLO to detect people in the image
            results = yolo_model(image)
            detections = results[0]  # Get detections from YOLO
            
            # Initialize label data for the image
            label_data = []

            # Process each person detected
            for det in detections.boxes:
                if det.cls == 0:  # Class 0 is for 'person' in YOLO
                    # Get bounding box coordinates for each person
                    x1, y1, x2, y2 = map(int, det.xyxy[0].cpu().numpy())

                    # Normalize bounding box coordinates
                    norm_x_center = normalize((x1 + x2) / 2, width)
                    norm_y_center = normalize((y1 + y2) / 2, height)
                    norm_width = normalize(x2 - x1, width)
                    norm_height = normalize(y2 - y1, height)

                    # Start label with the object class and bounding box data
                    person_label = [0, norm_x_center, norm_y_center, norm_width, norm_height]

                    # Extract the person from the image using the bounding box
                    person = image[y1:y2, x1:x2]

                    # Convert to RGB for Mediapipe processing
                    person_rgb = cv2.cvtColor(person, cv2.COLOR_BGR2RGB)
                    person_rgb.flags.writeable = False

                    # Apply Mediapipe Pose estimation
                    pose_results = pose.process(person_rgb)

                    # If pose landmarks detected, append them to the label
                    if pose_results.pose_landmarks:
                        for landmark in pose_results.pose_landmarks.landmark:
                            norm_x = normalize(landmark.x * (x2 - x1) + x1, width)
                            norm_y = normalize(landmark.y * (y2 - y1) + y1, height)
                            visibility = 2 if landmark.visibility > 0.5 else 0
                            person_label.extend([norm_x, norm_y, visibility])
                    else:
                        # If no landmarks are detected, fill with default values for missing landmarks
                        for _ in range(33):
                            person_label.extend([0, 0, 0])

                    # Append this person's label to the label_data
                    label_data.append(' '.join(map(str, person_label)))

            # Save the label data in a .txt file with the same name as the image
            if label_data:  # Only create label file if there's a person detected
                label_file_path = os.path.join(output_folder, f"{os.path.splitext(file_name)[0]}.txt")
                with open(label_file_path, 'w') as f:
                    f.write('\n'.join(label_data))

            # Save the labeled image to the output folder
            output_image_path = os.path.join(output_folder, file_name)
            cv2.imwrite(output_image_path, image)

            print(f"Processed and saved: {output_image_path}")

# Release any resources if needed
cv2.destroyAllWindows()



0: 384x640 4 persons, 104.9ms
Speed: 9.3ms preprocess, 104.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved: C:\Users\Admin\Downloads\data\label\img_0.jpg

0: 384x640 4 persons, 116.7ms
Speed: 1.6ms preprocess, 116.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved: C:\Users\Admin\Downloads\data\label\img_1.jpg

0: 384x640 4 persons, 139.0ms
Speed: 5.1ms preprocess, 139.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved: C:\Users\Admin\Downloads\data\label\img_2.jpg

0: 384x640 4 persons, 96.5ms
Speed: 0.0ms preprocess, 96.5ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved: C:\Users\Admin\Downloads\data\label\img_3.jpg

0: 384x640 4 persons, 120.0ms
Speed: 1.1ms preprocess, 120.0ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved: C:\Users\Admin\Downloads\data\label\img_4.jpg

0: 384x640 4 persons,

### labelling images in a folder saving labels in different folder as image

In [15]:
import cv2
import mediapipe as mp
import torch
import os
from ultralytics import YOLO

# Initialize Mediapipe pose model and drawing utilities
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# Load YOLOv8 model for person detection
yolo_model = YOLO('yolov8n.pt')  # Use 'yolov8n.pt' for lightweight model

# Specify input and output folder paths
input_folder = r'C:\Users\Admin\Downloads\data\images'
output_folder = r'C:\Users\Admin\Downloads\data\label'

# Ensure the output folder exists
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Function to normalize coordinates
def normalize(value, max_value):
    return value / max_value

# Initiate pose model
with mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=1) as pose:
    
    # Loop through each image in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith(('.jpg', '.jpeg', '.png')):  # Process only image files
            image_path = os.path.join(input_folder, file_name)

            # Read the image
            image = cv2.imread(image_path)
            height, width, _ = image.shape

            # Use YOLO to detect people in the image
            results = yolo_model(image)
            detections = results[0]  # Get detections from YOLO
            
            # Initialize label data for the image
            label_data = []

            # Process each person detected
            for det in detections.boxes:
                if det.cls == 0:  # Class 0 is for 'person' in YOLO
                    # Get bounding box coordinates for each person
                    x1, y1, x2, y2 = map(int, det.xyxy[0].cpu().numpy())

                    # Normalize bounding box coordinates
                    norm_x_center = normalize((x1 + x2) / 2, width)
                    norm_y_center = normalize((y1 + y2) / 2, height)
                    norm_width = normalize(x2 - x1, width)
                    norm_height = normalize(y2 - y1, height)

                    # Start label with the object class and bounding box data
                    person_label = [0, norm_x_center, norm_y_center, norm_width, norm_height]

                    # Extract the person from the image using the bounding box
                    person = image[y1:y2, x1:x2]

                    # Convert to RGB for Mediapipe processing
                    person_rgb = cv2.cvtColor(person, cv2.COLOR_BGR2RGB)
                    person_rgb.flags.writeable = False

                    # Apply Mediapipe Pose estimation
                    pose_results = pose.process(person_rgb)

                    # If pose landmarks detected, append them to the label
                    if pose_results.pose_landmarks:
                        for landmark in pose_results.pose_landmarks.landmark:
                            norm_x = normalize(landmark.x * (x2 - x1) + x1, width)
                            norm_y = normalize(landmark.y * (y2 - y1) + y1, height)
                            visibility = 2 if landmark.visibility > 0.5 else 0
                            person_label.extend([norm_x, norm_y, visibility])
                    else:
                        # If no landmarks are detected, fill with default values for missing landmarks
                        for _ in range(33):
                            person_label.extend([0, 0, 0])

                    # Append this person's label to the label_data
                    label_data.append(' '.join(map(str, person_label)))

            # Save the label data in a .txt file with the same name as the image
            if label_data:  # Only create label file if there's a person detected
                label_file_path = os.path.join(output_folder, f"{os.path.splitext(file_name)[0]}.txt")
                with open(label_file_path, 'w') as f:
                    f.write('\n'.join(label_data))

            print(f"Processed and saved labels for: {file_name}")

# Release any resources if needed
cv2.destroyAllWindows()



0: 384x640 4 persons, 130.9ms
Speed: 9.1ms preprocess, 130.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved labels for: img_0.jpg

0: 384x640 4 persons, 106.0ms
Speed: 2.5ms preprocess, 106.0ms inference, 4.5ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved labels for: img_1.jpg

0: 384x640 4 persons, 83.8ms
Speed: 1.1ms preprocess, 83.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved labels for: img_10.jpg

0: 384x640 4 persons, 101.7ms
Speed: 1.6ms preprocess, 101.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved labels for: img_100.jpg

0: 384x640 4 persons, 143.8ms
Speed: 4.5ms preprocess, 143.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
Processed and saved labels for: img_101.jpg

0: 384x640 4 persons, 120.9ms
Speed: 6.1ms preprocess, 120.9ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)
Processed and