In [1]:
import cv2
import torch
import os
import numpy as np

In [2]:
# Load YOLOv5 model (small version for faster processing)
yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # You can also use 'yolov5m' or 'yolov5l' for larger models

# Create a directory for saving frames if it doesn't exist
if not os.path.exists('results/frames'):
    os.makedirs('results/frames')

Using cache found in /home/prasanna-nage/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-11-27 Python-3.12.3 torch-2.5.1+cu124 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [3]:
def detect_entities(video_path):
    # Initialize the video capture
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    event_log = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Detect objects in the current frame using YOLO
        results = yolo_model(frame)  # Perform detection
        
        # Check the type of result (depending on the YOLO version, the results could be a list or Results object)
        if isinstance(results, list):  # In case of older YOLO versions, where result is a list
            detections = results[0]  # Get the first detection result
        else:  # For newer versions, the result is a Results object (like YOLOv5)
            detections = results.xyxy[0].cpu().numpy()  # Get detections as a NumPy array
        
        # Process each detected object
        for detection in detections:
            x1, y1, x2, y2, conf, cls = detection
            label = yolo_model.names[int(cls)]  # Get class label of detected object

            # Check if the label is similar to "baseball bat" or "sports equipment" and classify it as a "hockey stick"
            if label == "baseball bat" or label == "sports ball":  # Adjust this based on actual YOLO class
                if is_hockey_stick(frame[int(y1):int(y2), int(x1):int(x2)]):  # Check if it looks like a hockey stick
                    label = "hockey stick"  # Classify it as hockey stick
            
            # Log detected events with frame count, label, confidence, and bounding box
            event_log.append({
                'frame': frame_count,
                'label': label,
                'confidence': conf,
                'bbox': (x1, y1, x2, y2)
            })

            # Detect teams and referees
            team = classify_team(frame[int(y1):int(y2), int(x1):int(x2)])
            is_referee = classify_referee(frame[int(y1):int(y2), int(x1):int(x2)])

            # Add team and referee information to the event log
            if team is not None:
                event_log[-1]['team'] = team
            if is_referee:
                event_log[-1]['referee'] = True

            # Draw bounding box around the detected object on the frame
            color = (0, 255, 0)  # Default color (green) for objects
            if team == 0:
                color = (0, 0, 255)  # Team 1 (Blue)
            elif team == 1:
                color = (255, 0, 0)  # Team 2 (Red)
            elif is_referee:
                color = (255, 255, 255)  # Referee (White)

            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)  # Draw colored box
            cv2.putText(frame, f'{label} {conf:.2f}', (int(x1), int(y1)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

        # Save the frame with bounding boxes
        save_frame(frame, frame_count)

        # Display the frame with bounding boxes
        cv2.imshow("Detected Entities", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit early
            break
        
        frame_count += 1

    cap.release()
    cv2.destroyAllWindows()

    return event_log

In [4]:
# Function to save the frame with bounding boxes
def save_frame(frame, frame_count):
    output_frame_path = f"results/frames/frame_{frame_count:04d}.jpg"
    cv2.imwrite(output_frame_path, frame)
    print(f"Saved frame {frame_count}.")

In [5]:
def classify_team(player_image):
    # Convert the player image to HSV for easier color detection
    hsv = cv2.cvtColor(player_image, cv2.COLOR_BGR2HSV)
    
    # Define color ranges for team detection
    # Example: Team 1 (Blue) and Team 2 (Red)
    team_1_lower = np.array([100, 150, 50])  # HSV range for blue color
    team_1_upper = np.array([140, 255, 255])
    team_2_lower = np.array([0, 120, 70])    # HSV range for red color
    team_2_upper = np.array([10, 255, 255])
    
    # Mask to detect blue color (Team 1)
    mask_team_1 = cv2.inRange(hsv, team_1_lower, team_1_upper)
    # Mask to detect red color (Team 2)
    mask_team_2 = cv2.inRange(hsv, team_2_lower, team_2_upper)

    # Count the number of pixels that match each team color
    if cv2.countNonZero(mask_team_1) > cv2.countNonZero(mask_team_2):
        return 0  # Team 1 (Blue)
    elif cv2.countNonZero(mask_team_2) > cv2.countNonZero(mask_team_1):
        return 1  # Team 2 (Red)
    else:
        return None  # Undetermined (could be referees or other cases)

In [6]:
# Function to classify if the detected player is a referee
def classify_referee(player_image):
    # Convert the player image to grayscale and apply edge detection
    gray = cv2.cvtColor(player_image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, threshold1=100, threshold2=200)

    # A simple heuristic: if there are a lot of edges (referee shirts often have more distinct lines)
    if np.count_nonzero(edges) > 500:
        return True  # Likely a referee
    return False


In [7]:
# Function to check if an object looks like a hockey stick (simple heuristic based on shape)
def is_hockey_stick(obj_image):
    # Check aspect ratio (hockey sticks are typically long and narrow)
    height, width = obj_image.shape[:2]
    aspect_ratio = width / height
    
    # A hockey stick should have an elongated shape (aspect ratio higher than a normal bat or other equipment)
    if aspect_ratio > 4:  # You can adjust this threshold based on observations
        return True
    return False

In [8]:
# Path to your video (update with your actual video file path)
video_path = 'clip.mp4'

print("Starting Entity Detection...")
event_log = detect_entities(video_path)
print("Entity Detection Completed!")
print("Detected Events:", event_log)

Starting Entity Detection...
Saved frame 0.


  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 1.
Saved frame 2.
Saved frame 3.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 4.
Saved frame 5.
Saved frame 6.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 7.
Saved frame 8.
Saved frame 9.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 10.
Saved frame 11.
Saved frame 12.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 13.
Saved frame 14.
Saved frame 15.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 16.
Saved frame 17.
Saved frame 18.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 19.
Saved frame 20.
Saved frame 21.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 22.
Saved frame 23.
Saved frame 24.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 25.
Saved frame 26.
Saved frame 27.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 28.
Saved frame 29.
Saved frame 30.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 31.
Saved frame 32.
Saved frame 33.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 34.
Saved frame 35.
Saved frame 36.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 37.
Saved frame 38.
Saved frame 39.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 40.
Saved frame 41.
Saved frame 42.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 43.
Saved frame 44.
Saved frame 45.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 46.
Saved frame 47.
Saved frame 48.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 49.
Saved frame 50.
Saved frame 51.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 52.
Saved frame 53.
Saved frame 54.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 55.
Saved frame 56.
Saved frame 57.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 58.
Saved frame 59.
Saved frame 60.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 61.
Saved frame 62.
Saved frame 63.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 64.


  with amp.autocast(autocast):


Saved frame 65.
Saved frame 66.
Saved frame 67.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 68.
Saved frame 69.
Saved frame 70.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 71.
Saved frame 72.
Saved frame 73.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 74.
Saved frame 75.
Saved frame 76.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 77.
Saved frame 78.
Saved frame 79.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 80.
Saved frame 81.
Saved frame 82.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 83.
Saved frame 84.
Saved frame 85.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 86.
Saved frame 87.
Saved frame 88.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 89.
Saved frame 90.
Saved frame 91.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 92.
Saved frame 93.
Saved frame 94.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 95.
Saved frame 96.


  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 97.
Saved frame 98.
Saved frame 99.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 100.
Saved frame 101.
Saved frame 102.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 103.
Saved frame 104.
Saved frame 105.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 106.
Saved frame 107.
Saved frame 108.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 109.
Saved frame 110.
Saved frame 111.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 112.
Saved frame 113.
Saved frame 114.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 115.
Saved frame 116.
Saved frame 117.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 118.


  with amp.autocast(autocast):


Saved frame 119.
Saved frame 120.
Saved frame 121.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 122.
Saved frame 123.
Saved frame 124.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 125.
Saved frame 126.
Saved frame 127.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 128.
Saved frame 129.
Saved frame 130.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 131.
Saved frame 132.
Saved frame 133.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 134.
Saved frame 135.
Saved frame 136.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 137.
Saved frame 138.
Saved frame 139.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 140.
Saved frame 141.
Saved frame 142.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 143.
Saved frame 144.
Saved frame 145.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 146.
Saved frame 147.
Saved frame 148.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 149.
Saved frame 150.
Saved frame 151.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 152.
Saved frame 153.
Saved frame 154.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 155.
Saved frame 156.
Saved frame 157.


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):


Saved frame 158.
Entity Detection Completed!
Detected Events: [{'frame': 0, 'label': 'person', 'confidence': np.float32(0.84099084), 'bbox': (np.float32(560.69086), np.float32(789.789), np.float32(765.5529), np.float32(1016.9264)), 'team': 0, 'referee': True}, {'frame': 0, 'label': 'person', 'confidence': np.float32(0.83934027), 'bbox': (np.float32(561.8473), np.float32(553.87634), np.float32(653.9218), np.float32(718.03766)), 'team': 1, 'referee': True}, {'frame': 0, 'label': 'person', 'confidence': np.float32(0.8168632), 'bbox': (np.float32(19.091795), np.float32(509.1971), np.float32(116.874985), np.float32(661.25446)), 'team': 1, 'referee': True}, {'frame': 0, 'label': 'person', 'confidence': np.float32(0.78326), 'bbox': (np.float32(1566.964), np.float32(391.0603), np.float32(1640.4006), np.float32(540.13513)), 'team': 1}, {'frame': 0, 'label': 'person', 'confidence': np.float32(0.7428544), 'bbox': (np.float32(1343.3119), np.float32(450.56287), np.float32(1444.3934), np.float32(593