In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.2.103-py3-none-any.whl.metadata (39 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.8-py3-none-any.whl.metadata (9.3 kB)
Downloading ultralytics-8.2.103-py3-none-any.whl (875 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m875.1/875.1 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.8-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.2.103 ultralytics-thop-2.0.8


### Player Tracker (player_tracker.py)

In [None]:
from ultralytics import YOLO
import cv2
import pickle

class PlayerTracker:
    def __init__(self, model_path):
        self.model = YOLO(model_path)

    def detect_frames(self, frames, read_from_stub=False, stub_path=None):
        player_detections = []
        if read_from_stub and stub_path is not None:
            with open(stub_path,'rb') as f:
                player_detections=pickle.load(f)
            return player_detections

        for frame in frames:
            player_dict = self.detect_frame(frame)
            player_detections.append(player_dict)

        if stub_path is not None:
            with open(stub_path,'wb') as f:
                pickle.dump(player_detections,f)

        return player_detections

    def detect_frame(self, frame):
        results = self.model.track(frame, persist=True)[0]
        id_name_dict = results.names
        player_dict = {}
        for box in results.boxes:
            track_id = int(box.id.tolist()[0])
            result = box.xyxy.tolist()[0]
            object_cls_id = box.cls.tolist()[0]
            object_cls_name = id_name_dict[object_cls_id]
            if object_cls_name == "person":
                player_dict[track_id] = result
        return player_dict

    def draw_bboxes(self, video_frames, player_detections):
        output_video_frames = []
        for frame, player_dict in zip(video_frames, player_detections):
            # Draw bounding boxes
            for track_id, bbox in player_dict.items():
                x1, y1, x2, y2 = bbox
                cv2.putText(frame, f"Player ID: {track_id}", (int(bbox[0]), int(bbox[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 0), 2)
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            output_video_frames.append(frame)
        return output_video_frames

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


### Ball Tracker (ball_tracker.py)

In [8]:
import pandas as pd

class BallTracker:
    def __init__(self, model_path):
        self.model = YOLO(model_path)

    def interpolate_ball_positions(self, ball_positions):
        ball_positions = [x.get(1,[]) for x in ball_positions]
        # convert the list into pandas dataframe
        df_ball_positions = pd.DataFrame(ball_positions,columns=['x1','y1','x2','y2'])

        # interpolate the missing values
        df_ball_positions = df_ball_positions.interpolate()
        df_ball_positions = df_ball_positions.bfill()

        ball_positions = [{1:x} for x in df_ball_positions.to_numpy().tolist()]

        return ball_positions

    def detect_frames(self, frames, read_from_stub=False, stub_path=None):
        ball_detections = []
        if read_from_stub and stub_path is not None:
            with open(stub_path,'rb') as f:
                ball_detections=pickle.load(f)
            return ball_detections

        for frame in frames:
            player_dict = self.detect_frame(frame)
            ball_detections.append(player_dict)

        if stub_path is not None:
            with open(stub_path,'wb') as f:
                pickle.dump(ball_detections,f)

        return ball_detections

    def detect_frame(self, frame):
        results = self.model.predict(frame, conf=0.15)[0]
        ball_dict = {}
        for box in results.boxes:
            result = box.xyxy.tolist()[0]
            ball_dict[1] = result
        return ball_dict

    def draw_bboxes(self, video_frames, ball_detections):
        output_video_frames = []
        for frame, ball_dict in zip(video_frames, ball_detections):
            # Draw bounding boxes
            for track_id, bbox in ball_dict.items():
                x1, y1, x2, y2 = bbox
                cv2.putText(frame, f"Ball ID: {track_id}", (int(bbox[0]), int(bbox[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 255), 2)
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 255), 2)
            output_video_frames.append(frame)
        return output_video_frames

### video_utils.py

In [6]:
import cv2

def read_video(video_path):
  cap=cv2.VideoCapture(video_path)
  frames=[]
  while cap.isOpened():
    ret,frame=cap.read()
    if not ret:
      break
    frames.append(frame)
  cap.release()
  return frames

def save_video(output_video_frames,output_video_path):
  height, width = output_video_frames[0].shape[:2]
  fourcc=cv2.VideoWriter_fourcc(*'MJPG')
  out=cv2.VideoWriter(output_video_path,fourcc,24,(width,height))
  for frame in output_video_frames:
    out.write(frame)
  out.release()

### Tennis Court Keypoints Training

In [None]:
!unzip tennis_court_det_dataset.zip

In [None]:
import torch
from torch.utils.data import  Dataset, DataLoader
from torchvision  import transforms, utils, models

import json
import cv2
import numpy as np

dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(dev)

In [None]:
class KeypointsDataset(Dataset):
  def __init__(self, img_dir, data_file):
    self.img_dir = img_dir
    with open(data_file, 'r') as f:
      self.data = json.load(f)

    self.transforms = transforms.Compose([
      transforms.ToPILImage(),
      transforms.Resize((224, 224)),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    item = self.data[idx]
    img = cv2.imread(f"{self.img_dir}/{item['id']}.png")
    h,w = img.shape[:2]

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = self.transforms(img)
    kps = np.array(item['kps']).flatten()
    kps = kps.astype(np.float32)

    kps[::2] = 224.0/w # Adjust x coordinates
    kps[1::2] = 224.0/h # Adjust y coordinates

    return img, kps

In [None]:
train_dataset = KeypointsDataset("data/images",'data/data_train.json')
val_dataset = KeypointsDataset("data/images",'data/data_val.json')

train_loader = DataLoader(train_dataset batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)

In [None]:
# Creating the Model
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 14*2) # Replace the last layer
model = model.to(dev)

# Training the Model
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

epochs = 20
for epoch in range(epochs):
  for i, (imgs, kps) in enumerate(train_loader):
    imgs = imgs.to(dev)
    kps = kps.to(dev)

    optimizer.zero_grad()
    outputs = model(imgs)
    loss = criterion(outputs, kps)
    loss.backward()
    optimizer.step()

    if i % 10 == 0:
        print(f"Epoch {epoch}, iter {i}, loss: {loss.item()}")

# Saving the Model
torch.save(model.state_dict(), 'keypoints_model.pth')

### Court Line Detector (court_line_detector.py)

In [4]:
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt

class CourtLineDetector:
    def __init__(self, model_path):
        self.model = models.resnet50(pretrained=True)
        self.model.fc = torch.nn.Linear(self.model.fc.in_features, 14*2)
        self.model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')), strict=False)

        self.transforms = transforms.Compose([
          transforms.ToPILImage(),
          transforms.Resize((224, 224)),
          transforms.ToTensor(),
          transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def predict(self, image):
        img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image_tensor = self.transforms(img_rgb)
        image_tensor = image_tensor.unsqueeze(0)

        with torch.no_grad():
          outputs = self.model(image_tensor)

        keypoints = outputs.squeeze().numpy()
        org_h, org_w = img_rgb.shape[:2]

        keypoints[::2] *= org_w/224.0
        keypoints[1::2] *= org_h/224.0

        return keypoints

    def draw_keypoints(self, image, keypoints):
        for i in range(0, len(keypoints), 2):
          x, y = int(keypoints[i]), int(keypoints[i+1])

          cv2.putText(image, str(i//2), (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
          cv2.circle(image, (x, y), 5, (0, 0, 255), -1)
        return image

    def draw_keypoints_on_video(self, video_frames, keypoints):
        output_video_frames = []
        for frame in video_frames:
          frame = self.draw_keypoints(frame, keypoints)
          output_video_frames.append(frame)
        return output_video_frames

### main.py

In [9]:
input_video_path="/content/drive/MyDrive/AI_Project_Models/input_video.mp4"
video_frames = read_video(input_video_path)

player_tracker = PlayerTracker(model_path='yolov8x')
ball_tracker = BallTracker(model_path='/content/drive/MyDrive/AI_Project_Models/yolov5_best.pt')

player_detections=player_tracker.detect_frames(video_frames,
                                               read_from_stub=True,
                                               stub_path='/content/drive/MyDrive/AI_Project_Models/tracker_stubs/player_detections.pkl')
ball_detections=ball_tracker.detect_frames(video_frames,
                                           read_from_stub=True,
                                           stub_path='/content/drive/MyDrive/AI_Project_Models/tracker_stubs/ball_detections.pkl')
ball_detections = ball_tracker.interpolate_ball_positions(ball_detections)

court_line_detector = CourtLineDetector(model_path='/content/drive/MyDrive/AI_Project_Models/keypoints_model.pth')
court_keypoints = court_line_detector.predict(video_frames[0])

output_video_frames = player_tracker.draw_bboxes(video_frames,player_detections)
output_video_frames = ball_tracker.draw_bboxes(output_video_frames,ball_detections)
output_video_frames = court_line_detector.draw_keypoints_on_video(video_frames,court_keypoints)
save_video(output_video_frames,"/content/drive/MyDrive/AI_Project_Models/output_video_interpolation.avi")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 164MB/s]


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
