In [1]:
import cv2

In [2]:
def extract_frames(video_path):
  frames = []
  video_capture = cv2.VideoCapture(video_path)
  while(True):
    ret, frame = video_capture.read()
    if ret == False:
      break
    frames.append(frame)
  video_capture.release()
  return frames

def save_video(output_video_frames, path):
  height, width, _ = output_video_frames[0].shape
  fourcc = cv2.VideoWriter_fourcc(*'XVID')
  out = cv2.VideoWriter(path, fourcc, 30, (width, height))
  for frame in output_video_frames:
      out.write(frame)
  out.release()

In [3]:
#Returns frames of the input video
frames = extract_frames("point.mp4")

# Detecting players

In [None]:
!pip install ultralytics
from ultralytics import YOLO

In [None]:
def closest(box, results):
  min_distance = float('inf')
  min_id = None
  for bbox in results.boxes:
    other_box = bbox.xyxy.tolist()[0]
    x1, y1, x2, y2 = box
    x_1, y_1, x_2, y_2 = other_box
    center1_x = (x1+x2)/2
    center1_y = (y1+y2)/2
    center2_x = (x_1+x_2)/2
    center2_y = (y_1+y_2)/2
    distance = (center1_x - center2_x)**2 + (center1_y - center2_y)**2
    if distance < min_distance and results.names[bbox.cls.tolist()[0]] == "person":
      min_distance = distance
      min_id = int(bbox.id.tolist()[0])
  return min_id

In [None]:
model = YOLO("models/yolov8x.pt")
player_detections = [] #List of dictionaries for each frame
player_ids = []
for frame in frames:
  results = model.track(frame, persist = True)[0] #tracking objects
  player_dict = {} #Key:player_id value:coordinates of a box
  for box in results.boxes:
    id = int(box.id.tolist()[0])
    result = box.xyxy.tolist()[0]
    class_name = results.names[box.cls.tolist()[0]]
    if class_name == "tennis racket":
      player_id = closest(result, results) #person closest to the tennis racket is a player
      if player_id not in player_ids:
        player_ids.append(player_id)
    elif class_name == "person" and id in player_ids:
      player_dict[id] = result
  player_detections.append(player_dict)

In [8]:
#Drawing bounding boxes
output_frames=[]
for frame, player_dict in zip(frames, player_detections):
  for id, box in player_dict.items():
    x1, y1, x2, y2 = box
    color = (0, 255, 0)
    thickness = 2
    cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, thickness)
    cv2.putText(frame, f"Player: {player_ids.index(id)+1}", (int(box[0]), int(box[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
  output_frames.append(frame)

In [9]:
#Saves the video
#Input is video frames and path
save_video(output_frames, "output_point.avi")