### 0. Import libraries

In [38]:
!pip install --upgrade ultralytics





In [39]:
try:
    import cv2
    from ultralytics import YOLO
    from ultralytics.yolo.v8.detect.predict import DetectionPredictor
    import torchvision.ops as ops
    import colorsys
    import datetime
    import time
    import mediapipe as mp
    from moviepy.editor import VideoFileClip
    
    print('All packages imported')
except:
    !pip install cv2 ultralytics torchvision moviepy mediapipe
    import cv2
    from ultralytics import YOLO
    from ultralytics.yolo.v8.detect.predict import DetectionPredictor
    import torchvision.ops as ops
    import colorsys
    import datetime
    import time
    import mediapipe as mp
    from moviepy.editor import VideoFileClip
    print('Some packages were not installed, installed and imported')

All packages imported


### 1. Resize input

Code that converts the resolution of a video to 800x400:

In [40]:
def resize_video(input_path, output_path, width, height):
    video = cv2.VideoCapture(input_path)
    success, frame = video.read()
    if not success:
        raise ValueError("Kan de video niet lezen")

    # Krijg de oorspronkelijke breedte en hoogte van de video
    original_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Bereken de schaalverhouding
    scale_ratio = min(width / original_width, height / original_height)

    # Bereken het nieuwe formaat
    new_width = int(original_width * scale_ratio)
    new_height = int(original_height * scale_ratio)

    # Maak een VideoWriter-object om het uitvoerbestand te maken
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_video = cv2.VideoWriter(output_path, fourcc, 30.0, (new_width, new_height))

    while success:
        # Verklein het frame naar het nieuwe formaat
        resized_frame = cv2.resize(frame, (new_width, new_height))
        # Schrijf het verkleinde frame naar het uitvoerbestand
        output_video.write(resized_frame)

        # Lees het volgende frame
        success, frame = video.read()

    # Sluit de video-objecten
    video.release()
    output_video.release()

In [41]:
# # Pad naar het invoerbestand
# input_file = 'vid.mp4'

# # Pad naar het uitvoerbestand
# output_file = 'vid_res.mp4'

# # Breedte en hoogte voor het formaat wijzigen
# target_width = 640
# target_height = 360

# resize_video(input_file, output_file, target_width, target_height)

### 2. Apply the model on the converted video

In [42]:
INPUT = "videos/vid2.mp4"  # video
threshold = 0.70  # detection threshold
model = YOLO('models/paddletracker v1.7.pt')
mp_pose = mp.solutions.pose



Pose analysis done by Google Mediapipe

In [43]:
def pose_analysis(image):
    with mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
        image_height, image_width, _ = image.shape
        results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        if results.pose_landmarks:
            # Connect landmarks
            mp_drawing = mp.solutions.drawing_utils
            annotated_image = image.copy()
            mp_drawing.draw_landmarks(
                annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
                connection_drawing_spec=mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2)
            )
            return annotated_image
    return image



Paddle detection (incl. lights) done by Yolov8

In [44]:
def paddle_analysis(frame):
    result = model.predict(frame, threshold)
    result = list(result) # Convert to a list
    boxes = result[0].boxes.xyxy.cuda()
    scores = result[0].boxes.conf.cuda()
    class_ids = result[0].names 
    
    for box, score, class_id in zip(boxes, scores, class_ids):
        
        if score >= threshold:
            box = [int(i) for i in box]
            cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (255,255,255), 2)
            cv2.putText(frame, f"{class_id}", (box[0], box[3] + 36), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    return frame


In [45]:
def analyse_video():
    cap = cv2.VideoCapture(INPUT)
    prev_timestamp = time.time()

    # Initialiseren van de Mediapipe Pose Detection
    mp_pose = mp.solutions.pose
    with mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
        while cap.isOpened():
            ret, frame = cap.read()

            img = paddle_analysis(frame)
            img = pose_analysis(img)

            current_timestamp = time.time()
            time_delta = current_timestamp - prev_timestamp
            
            if img is None or img.shape[0] == 0 or img.shape[1] == 0:
                break

            # # Voer pose detection uit op het frame
            # img = pose_analysis(img)

            cv2.imshow('image', img)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            prev_timestamp = current_timestamp

    cap.release()
    cv2.destroyAllWindows()


In [46]:
analyse_video()


0: 480x800 2 light_neutrals, 1 paddle, 40.5ms
Speed: 6.5ms preprocess, 40.5ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 2 light_neutrals, 1 paddle, 12.5ms
Speed: 4.5ms preprocess, 12.5ms inference, 2.5ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 2 light_neutrals, 1 paddle, 9.5ms
Speed: 6.5ms preprocess, 9.5ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 2 light_neutrals, 1 paddle, 9.5ms
Speed: 4.0ms preprocess, 9.5ms inference, 2.6ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 2 light_neutrals, 1 paddle, 9.5ms
Speed: 4.5ms preprocess, 9.5ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 2 light_neutrals, 1 paddle, 10.5ms
Speed: 4.0ms preprocess, 10.5ms inference, 2.0ms postprocess per image at shape (1, 3, 800, 800)

0: 480x800 2 light_neutrals, 1 paddle, 9.5ms
Speed: 3.5ms preprocess, 9.5ms inference, 3.0ms postprocess per image at shape (1, 3, 800, 800)
