### 0. Import libraries

In [1]:
import cv2
import mediapipe as mp
from ultralytics import YOLO
import time

View and update settings with 'yolo settings' or at 'C:\Users\jurek\AppData\Roaming\Ultralytics\settings.yaml'


In [None]:
POSEVIDEO = "POSE2.mp4"
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

pose_video = cv2.VideoCapture(POSEVIDEO)

video_analysis(pose_video, 'pose')

### 1. Resize input

Code that converts the resolution of a video to 800x400:

In [4]:
def resize_video(input_path, output_path, width, height):
    video = cv2.VideoCapture(input_path)
    success, frame = video.read()
    if not success:
        raise ValueError("Kan de video niet lezen")

    # Krijg de oorspronkelijke breedte en hoogte van de video
    original_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Bereken de schaalverhouding
    scale_ratio = min(width / original_width, height / original_height)

    # Bereken het nieuwe formaat
    new_width = int(original_width * scale_ratio)
    new_height = int(original_height * scale_ratio)

    # Maak een VideoWriter-object om het uitvoerbestand te maken
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_video = cv2.VideoWriter(output_path, fourcc, 30.0, (new_width, new_height))

    while success:
        # Verklein het frame naar het nieuwe formaat
        resized_frame = cv2.resize(frame, (new_width, new_height))
        # Schrijf het verkleinde frame naar het uitvoerbestand
        output_video.write(resized_frame)

        # Lees het volgende frame
        success, frame = video.read()

    # Sluit de video-objecten
    video.release()
    output_video.release()

### 2. Apply the model on the converted video

In [6]:
POSEVIDEO = "videos/posevideo.mp4"  # video
PADDLEVIDEO = "videos/paddlevideo.mp4"  # video
threshold = 0.50 # detection threshold
model = YOLO('models/paddletracker v2.1.pt')

# Mediapipe utils
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose


Pose analysis done by Google Mediapipe

In [7]:
def calculate_angle(a, b, c):
    a = np.array(a) # First
    b = np.array(b) # Mid 
    c = np.array(c) # End
    
    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
    angle = np.abs(radians*180.0/np.pi)
    
    if angle > 180.0:
        angle = 360-angle
        
    return angle

In [8]:
def pose_analysis(frame):
    with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
        # Make detection
        results = pose.process(frame)

        stance = "Stance not OK"

        # Recolor back to BGR
        frame.flags.writeable = True
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        # Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark

            # Get coordinates
            elbow_left = [landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].x, landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].y]
            shoulder_left = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y]
            hip_left = [landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].x, landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].y]
            wrist_left = [landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].x, landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].y]

            elbow_right = [landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].y]
            shoulder_right = [landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].y]
            hip_right = [landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value].y]

            # Calculate angles
            angle_hipshoulderelbow_left = calculate_angle(hip_left, shoulder_left, elbow_left)
            angle_hipshoulderelbow_right = calculate_angle(hip_right, shoulder_right, elbow_right)


            # Visualize angles
            cv2.putText(frame, str(angle_hipshoulderelbow_left),
                        tuple(np.multiply(shoulder_left, [640, 480]).astype(int)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA)

            cv2.putText(frame, str(angle_hipshoulderelbow_right),
                        tuple(np.multiply(shoulder_right, [640, 480]).astype(int)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA)

            # Curl counter logic
            if angle_hipshoulderelbow_left > 60:
                if angle_hipshoulderelbow_right > 60:
                    stance = "Stance OK"
                else:
                    stance = "Right hit"
            elif angle_hipshoulderelbow_right > 60:
                if angle_hipshoulderelbow_left > 60:
                    stance = "Stance OK"
                else:
                    stance = "Left hit"
                

        except:
            pass

    return [time.time(), stance]


Paddle detection (incl. lights) done by Yolov8

In [14]:
def paddle_analysis(frame):
    result = model.predict(frame, threshold)
    result = list(result)  # Convert to a list
    boxes = result[0].boxes.xyxy.cuda()
    scores = result[0].boxes.conf.cuda()
    class_ids = result[0].names

    class_names = ["Paddle", "Light_Green"]  # Voeg hier de namen van de klassen toe in de juiste volgorde

    current_timestamp = time.time()
    detected = False

    image = frame.copy()  # Create a copy of the frame

    for box, score, class_id in zip(boxes, scores, class_ids):
        if score >= threshold:
            if class_id == 1:
                detected = True

    if detected:
        return [current_timestamp, 'legal_hit']
    else:
        return [current_timestamp, 'illegal_hit']


Method that analyses the given video

In [2]:
def video_analysis(cap, model):
    prev_timestamp = time.time()
    paddleState = []
    poseState = []

    with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
        frame_count = 0

        while cap.isOpened():
            ret, frame = cap.read()

            if not ret or frame is None or frame.shape[0] == 0 or frame.shape[1] == 0:
                break

            # Process every 5 frames
            if frame_count % 5 == 0:
                if model == 'pose':
                    poseState.append(pose_analysis(frame))
                elif model == 'paddle':
                    paddleState.append(paddle_analysis(frame))
                else:
                    raise ValueError(f"Invalid model: {model}")

            frame_count += 1

            current_timestamp = time.time()
            time_delta = current_timestamp - prev_timestamp
            prev_timestamp = current_timestamp

    cap.release()
    return [paddleState, poseState]


Method that takes two video path's

In [12]:
def analyse_videos(INPUT1, INPUT2):
    
    video1 = cv2.VideoCapture(INPUT1)
    video2 = cv2.VideoCapture(INPUT2)

    result = video_analysis(video1, 'pose')
    result2 = video_analysis(video2, 'paddle')

    for i in result:
        for j in i:
            print(j)
    
    for i in result2:
        for j in i:
            print(j)

In [13]:
analyse_videos(POSEVIDEO, PADDLEVIDEO)


0: 480x800 1 paddle, 116.2ms
Speed: 3.0ms preprocess, 116.2ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 1 paddle, 9.0ms
Speed: 3.0ms preprocess, 9.0ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 1 paddle, 8.0ms
Speed: 4.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 1 paddle, 9.0ms
Speed: 3.0ms preprocess, 9.0ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 1 light_green, 1 paddle, 8.5ms
Speed: 3.0ms preprocess, 8.5ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 1 light_green, 1 paddle, 9.0ms
Speed: 3.0ms preprocess, 9.0ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 1 light_green, 1 paddle, 8.0ms
Speed: 2.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 1 light_green, 1 paddle, 11.0ms
Speed: 4.0ms preprocess, 11.0ms inferen

[1686006269.13693, 'Stance not OK']
[1686006269.310481, 'Stance not OK']
[1686006269.485525, 'Stance not OK']
[1686006269.7373793, 'Stance not OK']
[1686006269.9119334, 'Stance not OK']
[1686006270.0869765, 'Stance not OK']
[1686006270.2605302, 'Stance not OK']
[1686006270.4460506, 'Stance not OK']
[1686006270.6315804, 'Stance not OK']
[1686006270.8031409, 'Stance not OK']
[1686006270.976188, 'Stance not OK']
[1686006271.1527338, 'Stance not OK']
[1686006271.3392553, 'Stance not OK']
[1686006271.5527077, 'Stance not OK']
[1686006271.7282543, 'Right hit']
[1686006271.9137757, 'Right hit']
[1686006272.09181, 'Stance OK']
[1686006272.2793248, 'Stance OK']
[1686006272.4887786, 'Stance OK']
[1686006272.6782866, 'Stance OK']
[1686006272.8677967, 'Stance OK']
[1686006273.0712688, 'Stance OK']
[1686006273.2657683, 'Stance OK']
[1686006273.5375664, 'Stance OK']
[1686006273.7330642, 'Stance OK']
[1686006273.9495053, 'Stance OK']
[1686006274.13802, 'Stance OK']
[1686006274.3883708, 'Stance OK']
[