In [10]:
import cv2
import numpy as np
from ultralytics import YOLO
import os
import pickle
import supervision as sv
import easyocr

In [4]:
cap=cv2.VideoCapture('CV_Task.mkv')

model=YOLO('models/best.pt')


In [5]:
results = model.predict(source='CV_Task.mkv', save = True)



errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/744) C:\Users\LORD TRADE\NN\YOLO\projects\football\CV_Task.mkv: 384x640 18 players, 3 referees, 145.5ms
video 1/1 (frame 2/744) C:\Users\LORD TRADE\NN\YOLO\projects\football\CV_Task.mkv: 384x640 18 players, 3 referees, 120.4ms
video 1/1 (frame 3/744) C:\Users\LORD TRADE\NN\YOLO\projects\football\CV_Task.mkv: 384x640 18 players, 3 referees, 110.2ms
video 1/1 (frame 4/744) C:\Users\LORD TRADE\NN\YOLO\projects\football\CV_Task.mkv: 384x640 18 players, 3 referees, 106.1ms
video 1/1 (frame 5/744) C:\Users\LORD TRADE\NN\YOLO\projects\football\CV_Task.mk

KeyboardInterrupt: 

# Create a Read & Save Video Functions

In [3]:
def read_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    return frames

def save_video(output_video_frames, output_video_path):
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_video_path, fourcc, 24, (output_video_frames[0].shape[1], output_video_frames[0].shape[0]))
    for frame in output_video_frames:
        out.write(frame)
    out.release()


def get_center_of_bbox(bbox):
    x1, y1, x2, y2 = bbox
    return int((x1 + x2)/2), int((y1 + y2)/2)

def get_bbox_width(bbox):
    return bbox[2] - bbox[0]

# Tracker

In [17]:
class Tracker:
    def __init__(self, model_path):
        self.model = YOLO(model_path)
        self.tracker = sv.ByteTrack()
    def detect_frames(self, frames):
        batch_size = 20
        detections = []
        for i in range(0, len(frames), batch_size):
            detections_batch = self.model.predict(frames[i: i + batch_size], conf = 0.1)
            detections += detections_batch
        return detections
    def get_object_tracks(self, frames, read_from_stub=False, stub_path=None):
        if read_from_stub and stub_path is not None and os.path.exists(stub_path):
            with open(stub_path,'rb') as f:
                tracks = pickle.load(f)
            return tracks

        detections = self.detect_frames(frames)
        tracks = {
            "players": [],
            "referees": [],
            "ball": []
        }

        for frame_num, detection in enumerate(detections):
            cls_names = detection.names
            cls_names_inv = {v:k for k, v in cls_names.items()}

            #Convert to Supervision Detection Format
            detection_supervision = sv.Detections.from_ultralytics(detection)

            #Convert GoalKeeper to Player Object
            for object_ind, class_id in enumerate(detection_supervision.class_id):
                if cls_names[class_id] == "goalkeeper":
                    detection_supervision.class_id[object_ind] = cls_names_inv["player"]

            #Track Objects
            detection_with_tracks = self.tracker.update_with_detections(detection_supervision)

            tracks["players"].append({})
            tracks["referees"].append({})
            tracks["ball"].append({})

            for frame_detection in detection_with_tracks:
                bbox = frame_detection[0].tolist()
                cls_id = frame_detection[3]
                track_id = frame_detection[4]

                if cls_id == cls_names_inv['player']:
                    tracks["players"][frame_num][track_id] = {"bbox": bbox}
                if cls_id == cls_names_inv['referee']:
                    tracks["referees"][frame_num][track_id] = {"bbox":bbox}

            for frame_detection in detection_supervision:
                bbox = frame_detection[0].tolist()
                cls_id = frame_detection[3]

                if cls_id == cls_names_inv['ball']:
                    tracks["ball"][frame_num][1] = {"bbox":bbox}

        if stub_path is not None:
            with open(stub_path,'wb') as f:
                pickle.dump(tracks,f)

        return tracks

    def draw_ellipse(self, frame, bbox, color, track_id=None):
        y2 = int(bbox[3])
        x_center, _ = get_center_of_bbox(bbox)
        width = get_bbox_width(bbox)

        cv2.ellipse(
            frame,
            center=(x_center, y2),
            axes=(int(width), int(0.35 * width)),
            angle=0.0,
            startAngle=-45,
            endAngle=235,
            color=color,
            thickness=2,
            lineType=cv2.LINE_4
        )

        rectangle_width = 40
        rectangle_height = 20
        x1_rect = x_center - rectangle_width // 2
        x2_rect = x_center + rectangle_width // 2
        y1_rect = (y2 - rectangle_height // 2) + 15
        y2_rect = (y2 + rectangle_height // 2) + 15

        if track_id is not None:
            cv2.rectangle(frame,
                          (int(x1_rect), int(y1_rect)),
                          (int(x2_rect), int(y2_rect)),
                          color,
                          cv2.FILLED)

            x1_text = x1_rect + 12
            if track_id > 99:
                x1_text -= 10

            cv2.putText(
                frame,
                f"{track_id}",
                (int(x1_text), int(y1_rect + 15)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                (0, 0, 0),
                2
            )

        return frame

    def draw_traingle(self, frame, bbox, color):
        y = int(bbox[1])
        x, _ = get_center_of_bbox(bbox)

        triangle_points = np.array([
            [x, y],
            [x - 10, y - 20],
            [x + 10, y - 20],
        ])
        cv2.drawContours(frame, [triangle_points], 0, color, cv2.FILLED)
        cv2.drawContours(frame, [triangle_points], 0, (0, 0, 0), 2)

        return frame

    def draw_annotations(self, video_frames, tracks):
        output_video_frames = []
        for frame_num, frame in enumerate(video_frames):
            frame = frame.copy()

            player_dict = tracks["players"][frame_num]
            ball_dict = tracks["ball"][frame_num]
            referee_dict = tracks["referees"][frame_num]

            # Draw Players
            for track_id, player in player_dict.items():
                frame = self.draw_ellipse(frame, player["bbox"], (0,0,255), track_id)

            # Draw Referee
            for _, referee in referee_dict.items():
                frame = self.draw_ellipse(frame, referee["bbox"], (0, 255, 255))

            # Draw ball
            for track_id, ball in ball_dict.items():
                frame = self.draw_traingle(frame, ball["bbox"], (0, 255, 0))

            output_video_frames.append(frame)

        return output_video_frames


In [18]:
def main():
    video_frames = read_video("CV_Task.mkv")

    #Initialize Tracker
    tracker = Tracker("models/best.pt")
    tracks = tracker.get_object_tracks(video_frames, read_from_stub=False, stub_path='tracker_stubs/player_detection.pkl')

    #Draw Output
    #Draw Object Tracks
    output_video_frames = tracker.draw_annotations(video_frames, tracks)

    #Save Video
    save_video(output_video_frames, 'output_videos/output.avi')


if __name__ == "__main__":
    main()


0: 384x640 18 players, 3 referees, 85.8ms
1: 384x640 19 players, 3 referees, 85.8ms
2: 384x640 20 players, 3 referees, 85.8ms
3: 384x640 19 players, 3 referees, 85.8ms
4: 384x640 20 players, 3 referees, 85.8ms
5: 384x640 20 players, 3 referees, 85.8ms
6: 384x640 20 players, 3 referees, 85.8ms
7: 384x640 20 players, 3 referees, 85.8ms
8: 384x640 20 players, 3 referees, 85.8ms
9: 384x640 20 players, 3 referees, 85.8ms
10: 384x640 19 players, 3 referees, 85.8ms
11: 384x640 20 players, 3 referees, 85.8ms
12: 384x640 20 players, 3 referees, 85.8ms
13: 384x640 20 players, 3 referees, 85.8ms
14: 384x640 19 players, 3 referees, 85.8ms
15: 384x640 19 players, 3 referees, 85.8ms
16: 384x640 20 players, 3 referees, 85.8ms
17: 384x640 20 players, 3 referees, 85.8ms
18: 384x640 20 players, 3 referees, 85.8ms
19: 384x640 19 players, 3 referees, 85.8ms
Speed: 2.4ms preprocess, 85.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 players, 3 referees, 85.0ms
1: 384x64