In [None]:
!pip install ultralytics==8.0.196
!pip install opencv-python


In [5]:
import cv2
import os
from ultralytics import YOLO



In [8]:
model = YOLO('model/best.pt')


In [14]:

# Function to process each frame of the boxing match and detect actions
def process_frame_for_actions(frame, conf_threshold=0.7, iou_threshold=0.5):
    results = model.predict(frame, conf=conf_threshold, iou=iou_threshold)
    
    # Iterate through the results (detections) for this frame
    for result in results:
        detections = result.boxes  # Get the bounding boxes
        for i in range(detections.xyxy.shape[0]):
            # Extract bounding box coordinates
            x1, y1, x2, y2 = detections.xyxy[i][:4].int().tolist()
            
            # Extract confidence score and label ID
            confidence = detections.conf[i].item()
            label_id = detections.cls[i].int().item()
            
            # Get the label name using the label ID (standing, kicking, punching,)
            label = model.names[label_id]

            # Only process if confidence is above the threshold
            if confidence >= conf_threshold:
                border_color = (255, 0, 0)  # Set color for boxing actions bounding box
                border_thickness = 3  # Set border thickness

                # Draw the bounding box around the detected action
                cv2.rectangle(frame, (x1, y1), (x2, y2), border_color, border_thickness)

                # Label the bounding box with the action name and confidence score
                cv2.putText(frame, f'{label} {confidence:.2f}', 
                            (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, border_color, 2)

    return results, frame  # Return the detection results and the processed frame


input_video_path = 'data/test_data/mma_test.mp4'
output_video_path = 'data/output_data/mma_output.mp4'

cap = cv2.VideoCapture(input_video_path)

fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Initialize counters
punch_count = 0
kick_count = 0
last_punch_frame = -10  # Initial large negative value to ensure first detection is counted
last_kick_frame = -10
frame_number = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Process each frame for boxing actions with specified conf and iou thresholds
    results, processed_frame = process_frame_for_actions(frame, conf_threshold=0.7, iou_threshold=0.5) 

    # Iterate through the detection results to count punches and kicks
    for result in results:
        detections = result.boxes.data
        for detection in detections:
            class_id = int(detection[5])
            label = result.names[class_id]

            # Check label and ensure a cooldown period has passed before counting again
            if label == 'punch' and frame_number > last_punch_frame + 15:  # Adjusted frame gap as necessary
                punch_count += 1
                last_punch_frame = frame_number
            elif label == 'kick' and frame_number > last_kick_frame + 15:  # Adjusted frame gap
                kick_count += 1
                last_kick_frame = frame_number

    # Add the punch and kick count to the frame
    cv2.putText(processed_frame, f'Punches: {punch_count}  Kicks: {kick_count}', 
                (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # Save the processed frame to the output video
    out.write(processed_frame)

    # Display the processed frame
    cv2.imshow('Boxing Action Detection', processed_frame)

    # Exit if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    frame_number += 1

# Release video resources
cap.release()
out.release()
cv2.destroyAllWindows()

# Print the final punch and kick counts
print(f'Total Punches: {punch_count}')
print(f'Total Kicks: {kick_count}')



0: 512x800 1 punch, 571.6ms
Speed: 11.3ms preprocess, 571.6ms inference, 0.0ms postprocess per image at shape (1, 3, 512, 800)

0: 512x800 1 punch, 411.4ms
Speed: 0.0ms preprocess, 411.4ms inference, 8.6ms postprocess per image at shape (1, 3, 512, 800)

0: 512x800 1 punch, 466.4ms
Speed: 7.6ms preprocess, 466.4ms inference, 2.0ms postprocess per image at shape (1, 3, 512, 800)

0: 512x800 1 punch, 462.0ms
Speed: 5.6ms preprocess, 462.0ms inference, 1.9ms postprocess per image at shape (1, 3, 512, 800)

0: 512x800 1 punch, 412.2ms
Speed: 8.5ms preprocess, 412.2ms inference, 2.1ms postprocess per image at shape (1, 3, 512, 800)

0: 512x800 1 punch, 392.9ms
Speed: 6.7ms preprocess, 392.9ms inference, 3.0ms postprocess per image at shape (1, 3, 512, 800)

0: 512x800 1 punch, 419.6ms
Speed: 5.5ms preprocess, 419.6ms inference, 8.3ms postprocess per image at shape (1, 3, 512, 800)

0: 512x800 1 punch, 392.5ms
Speed: 8.6ms preprocess, 392.5ms inference, 1.6ms postprocess per image at shape 

Total Punches: 14
Total Kicks: 6
