In [1]:
import pandas as pd
import os
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import torch
from collections import defaultdict
import json
from tqdm import tqdm

from ultralytics import YOLO

In [2]:
# Dataset dir
dataset_dir = '/app/tennis_data/stroke_recognition_dataset/master/v1.0'
match_info_json_path = os.path.join(dataset_dir, "match_info.json")

match_info_df = pd.read_json(match_info_json_path)
dataset_df = pd.read_csv(os.path.join(dataset_dir, "clean_dataset_info.csv"))

In [3]:
match_info_df

Unnamed: 0,match_id,filename,match_start_top,match_start_bottom,fps,first_point_start_frame,surface,tournament,year,round
0,0,000_Novak_Djokovic_v_Andy_Murray_Australian_Op...,Novak Djokovic,Andy Murray,25.0,41250,hardcourt,Australian Open,2016,F
1,1,001_Novak_Djokovic_v_Rafael_Nadal_Australian_O...,Novak Djokovic,Rafael Nadal,25.0,18000,hardcourt,Australian Open,2012,F
2,2,002_Novak_Djokovic_v_Roger_Federer_US_Open_201...,Roger Federer,Novak Djokovic,29.97003,500,hardcourt,US Open,2015,F
3,3,003_Novak_Djokovic_v_Roger_Federer_Wimbledon_2...,Roger Federer,Novak Djokovic,25.0,350,grass,Wimbledon,2019,F
4,4,004_Rafael_Nadal_v_Nick_Kyrgios_Wimbledon_2019...,Rafael Nadal,Nick Kyrgios,25.0,0,grass,Wimbledon,2019,R2
5,5,005_Roger_Federer_v_Rafael_Nadal_Wimbledon_201...,Roger Federer,Rafael Nadal,25.0,450,grass,Wimbledon,2019,SF
6,7,007_Stan_Wawrinka_v_Novak_Djokovic_US_Open_201...,Novak Djokovic,Stan Wawrinka,23.976024,0,hardcourt,US Open,2016,F
7,8,008_Novak_Djokovic_v_Rafael_Nadal_Australian_O...,Rafael Nadal,Novak Djokovic,29.941,7440,hardcourt,Australian Open,2019,F
8,22,022_Novak_Djokovic_v_Rafael_Nadal_Wimbledon_20...,Novak Djokovic,Rafael Nadal,25.0,0,grass,Wimbledon,2018,SF
9,23,023_Alexander_Zverev_v_Dominic_Thiem_US_Open_2...,Dominic Thiem,Alexander Zverev,29.97003,0,hardcourt,US Open,2020,F


In [4]:
dataset_df.head()

Unnamed: 0,filename,original_filename,top_cls,btm_cls,match_id,fps,ballhit_match_timestamp,ballhit_match_frame_pos,surface
0,t_serve_00001_b_other_00001_m000.mp4,m000_p000_bh00_serve_top_Novak_Djokovic.mp4,serve,other,0,25.0,00:27:31.896,41297,hardcourt
1,t_serve_00002_b_other_00002_m000.mp4,m000_p000_bh01_serve_top_Novak_Djokovic.mp4,serve,other,0,25.0,00:27:41.370,41534,hardcourt
2,t_other_00001_b_forehand_00001_m000.mp4,m000_p000_bh02_forehand_bottom_Andy_Murray.mp4,other,forehand,0,25.0,00:27:42.113,41552,hardcourt
3,t_serve_00003_b_other_00003_m000.mp4,m000_p001_bh00_serve_top_Novak_Djokovic.mp4,serve,other,0,25.0,00:28:02.918,42072,hardcourt
4,t_other_00002_b_backhand_00001_m000.mp4,m000_p001_bh01_backhand_bottom_Andy_Murray.mp4,other,backhand,0,25.0,00:28:03.661,42091,hardcourt


In [5]:
subset_df = dataset_df[dataset_df['match_id'].isin([0, 1, 2])]
# subset_df = dataset_df[dataset_df['match_id'].isin([3, 4, 5])]
# subset_df = dataset_df[dataset_df['match_id'].isin([7, 8, 22])]
# subset_df = dataset_df[dataset_df['match_id'].isin([23, 24, 25])]
# subset_df = dataset_df[dataset_df['match_id'].isin([100, 101, 102, 103, 104])]
# subset_df = dataset_df[dataset_df['match_id'].isin([105, 106, 107])]
# subset_df = dataset_df[dataset_df['match_id'].isin([108, 109, 110, 111])]
# subset_df = dataset_df[dataset_df['match_id'].isin([112, 113, 114])]

In [6]:
subset_df.shape

(1265, 9)

In [7]:
model = YOLO("yolov8x-pose-p6.pt")  # load an official model

In [8]:
filepaths = [os.path.join(dataset_dir, 'final_dataset_20_frames', filename)
             for filename in subset_df.filename.tolist()]

In [9]:
len(filepaths)

1265

In [10]:
for fp in tqdm(filepaths, desc="Processing videos"):
    
    video_results_gen = model.track(fp, stream=True, verbose=False)

    track_by_id = defaultdict(lambda: [])
    track_kps_by_id = defaultdict(lambda: [])
    track_boxes_by_id = defaultdict(lambda: [])
    frame_id = 0
    for frame_results in video_results_gen:
        if frame_results.boxes.id is not None:
            keypoints = frame_results.keypoints.cpu()
            boxes_xywh = frame_results.boxes.xywh.cpu()
            boxes_full = frame_results.boxes.cpu()
            track_ids = frame_results.boxes.id.int().cpu().tolist()
            for kps, bxs, bxs_xywh, track_id in zip(keypoints, boxes_full, boxes_xywh, track_ids):
                track_kps_by_id[track_id].append(kps)
                track_boxes_by_id[track_id].append(bxs_xywh)
                # print(bxs)
                # print(kps)
                track_by_id[track_id].append({
                    'video_path': fp,
                    'frame_id': frame_id,
                    'img_shape': bxs.orig_shape,
                    'class': bxs.cls.int().item(),
                    'class_conf': bxs.conf.item(),
                    'boxes_xywh': bxs.xywh.tolist()[0],
                    'boxes_xywhn': bxs.xywhn.tolist()[0],
                    'boxes_xyxy': bxs.xyxy.tolist()[0],
                    'boxes_xyxyn': bxs.xyxyn.tolist()[0],
                    'keypoints_xy': kps.xy.tolist()[0],
                    'keypoints_xyn': kps.xyn.tolist()[0],
                    'keypoints_conf': kps.conf.tolist()[0],
                })
        
        frame_id += 1

    pretty_data = json.dumps(track_by_id, indent=4)
    with open(f"{os.path.join(dataset_dir, 'final_dataset_json', os.path.basename(fp[:-4]))}.json", 
              'w') as json_file:
        json.dump(track_by_id, json_file, indent=4)

  return torch._C._cuda_getDeviceCount() > 0
Processing videos: 100% 1265/1265 [13:55:12<00:00, 39.61s/it]  


In [None]:
best_track_by_video = defaultdict(lambda: [])
best_track_by_height = defaultdict(lambda: [])

for fp in tqdm(filepaths[:10], desc="Processing videos"):
    video_results = model.track(fp, verbose=False)
    # print(video_results[0].path)
    print(type(video_results))
    track_by_id = defaultdict(lambda: [])
    track_kps_by_id = defaultdict(lambda: [])
    track_boxes_by_id = defaultdict(lambda: [])
    for frame_id, frame_results in enumerate(video_results):
        if frame_results.boxes.id is not None:
            keypoints = frame_results.keypoints.cpu()
            boxes_xywh = frame_results.boxes.xywh.cpu()
            boxes_full = frame_results.boxes.cpu()
            track_ids = frame_results.boxes.id.int().cpu().tolist()
            for kps, bxs, bxs_xywh, track_id in zip(keypoints, boxes_full, boxes_xywh, track_ids):
                track_kps_by_id[track_id].append(kps)
                track_boxes_by_id[track_id].append(bxs_xywh)
                # print(bxs)
                # print(kps)
                track_by_id[track_id].append({
                    'video_path': video_results[0].path,
                    'frame_id': frame_id,
                    'img_shape': bxs.orig_shape,
                    'class': bxs.cls.int().item(),
                    'class_conf': bxs.conf.item(),
                    'boxes_xywh': bxs.xywh.tolist()[0],
                    'boxes_xywhn': bxs.xywhn.tolist()[0],
                    'boxes_xyxy': bxs.xyxy.tolist()[0],
                    'boxes_xyxyn': bxs.xyxyn.tolist()[0],
                    'keypoints_xy': kps.xy.tolist()[0],
                    'keypoints_xyn': kps.xyn.tolist()[0],
                    'keypoints_conf': kps.conf.tolist()[0],
                })

    pretty_data = json.dumps(track_by_id, indent=4)
    with open(f"{os.path.join(dataset_dir, 'final_dataset_json', os.path.basename(video_results[0].path[:-4]))}_1.json", 
              'w') as json_file:
        json.dump(track_by_id, json_file, indent=4)

In [None]:
for video_results in results:
    print(video_results[0].path)
    for frame_id, frame_results in enumerate(video_results):
        curr_json = frame_results.tojson()
        print(curr_json)
    print('-'*50)

In [None]:
for video_results in results:
    print(video_results[0].path)
    for frame_id, frame_results in enumerate(video_results):
        frame_results.tojson()

In [None]:
model = YOLO("yolov8x-pose-p6.pt")  # load an official model

In [None]:
results = []
for i, ip in enumerate(df.resampled_path.tolist()):
    r = model.track(ip)#, save=True)
    results.append(r)

In [None]:
r = results[0][0]

In [None]:
r.boxes.id.int().tolist()

In [None]:
r.keypoints

In [None]:
best_track_by_video = defaultdict(lambda: [])
best_track_by_height = defaultdict(lambda: [])
for video_results in results:
    print(video_results[0].path)
    track_by_id = defaultdict(lambda: [])
    track_kps_by_id = defaultdict(lambda: [])
    track_boxes_by_id = defaultdict(lambda: [])
    for frame_id, frame_results in enumerate(video_results):
        if frame_results.boxes.id is not None:
            keypoints = frame_results.keypoints.cpu()
            boxes_xywh = frame_results.boxes.xywh.cpu()
            boxes_full = frame_results.boxes.cpu()
            track_ids = frame_results.boxes.id.int().cpu().tolist()
            for kps, bxs, bxs_xywh, track_id in zip(keypoints, boxes_full, boxes_xywh, track_ids):
                track_kps_by_id[track_id].append(kps)
                track_boxes_by_id[track_id].append(bxs_xywh)
                # print(bxs)
                # print(kps)
                track_by_id[track_id].append({
                    'video_path': video_results[0].path,
                    'img_shape': bxs.orig_shape,
                    'class': bxs.cls.int().item(),
                    'class_conf': bxs.conf.item(),
                    'boxes_xywh': bxs.xywh.tolist()[0],
                    'boxes_xywhn': bxs.xywhn.tolist()[0],
                    'boxes_xyxy': bxs.xyxy.tolist()[0],
                    'boxes_xyxyn': bxs.xyxyn.tolist()[0],
                    'keypoints_xy': kps.xy.tolist()[0],
                    'keypoints_xyn': kps.xyn.tolist()[0],
                    'keypoints_conf': kps.conf.tolist()[0],
                })
                
    pretty_data = json.dumps(track_by_id, indent=4)
    # print(pretty_data)
    with open(f"{os.path.join(dataset_dir, 'final_dataset_json', os.path.basename(video_results[0].path[:-4]))}.json", 
              'w') as json_file:
        json.dump(track_by_id, json_file, indent=4)

    # print(track_by_id)
    continue
    best_track_id = None
    best_conf = 0
    for track_id, kps in track_kps_by_id.items():
        curr_conf = 0
        for curr_kps in kps:
            # Do not take into account face keypoints
            curr_conf += torch.mean(curr_kps.conf[0][3:]).item()
        curr_conf /= 20
        if curr_conf > best_conf:
            best_conf = curr_conf
            best_track_id = track_id
        print(track_id, curr_conf)
    
    best_track_id_h = None
    best_height = 0
    for track_id, bxs in track_boxes_by_id.items():
        curr_height = 0
        for curr_box in bxs:
            curr_height += curr_box.cpu().tolist()[-1]
        curr_height /= 20
        if curr_height > best_height:
            best_height = curr_height
            best_track_id_h = track_id
        print(track_id, curr_height)
    
    print('------------------------------------------')
    print(best_track_id, best_conf)
    print(best_track_id_h, best_height)
    print('------------------------------------------')
    
    best_track_by_video[video_results[0].path] = (best_track_id, best_conf)
    best_track_by_height[video_results[0].path] = (best_track_id_h, best_height)

    # print('----######----')
    # print('----######----')
    # print('----######----')
    # print('----######----')
    
    # frames_to_write = []
    # for frame_id, frame_results in enumerate(video_results):
    #     if frame_results.boxes.id is not None:
    #         keypoints = frame_results.keypoints.cpu()
    #         track_ids = frame_results.boxes.id.int().cpu().tolist()
    #         for kps, track_id in zip(keypoints, track_ids):
    #             # Do not take into account face keypoints
    #             # track_confs_by_id[track_id].append(torch.mean(kps.conf[0][3:]).item())
    #             track_confs_by_id[track_id].append(kps)

best_track_by_video = dict(sorted(best_track_by_video.items()))
best_track_by_height = dict(sorted(best_track_by_height.items()))

In [None]:
best_track_by_video

In [None]:
best_track_by_height

In [None]:
r = results[7][10]
r.boxes.xywh

In [None]:
r.boxes

In [None]:
r = results[7][10]
fr = r.keypoints[0].plot()
plot_img(fr)

In [None]:
r.boxes

In [None]:
for rs in results:
    print(rs[0].path)

In [None]:
r = results[7][10]
r.show()

In [None]:
r.boxes

In [None]:
r = results[3][5]

In [None]:
r.keypoints.xy.numpy()[0]

In [None]:
pose_id = 0

In [None]:
x = r.keypoints.xy.numpy()[pose_id][:, 0]
y = r.keypoints.xy.numpy()[pose_id][:, 1]

In [None]:
print(torch.mean(r.keypoints.conf[pose_id][3:]))

In [None]:
plt.imshow(r.orig_img)
plt.scatter(x, y)
plt.show()

In [None]:
print(torch.mean(r.keypoints.conf[0][3:]))

In [None]:
torch.mean(r.keypoints.conf)

In [None]:
for r in results[0]:
    print(type(r))
    break

In [None]:
# Load a model
model = YOLO("yolov8x.pt")  # pretrained YOLOv8n model

# Run batched inference on a list of images
results = model(["im1.png", "im2.png"], classes=[0, 38])  # return a list of Results objects

# Process results list
for result in results:
    boxes = result.boxes  # Boxes object for bounding box outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs
    obb = result.obb  # Oriented boxes object for OBB outputs
    result.show()  # display to screen
    result.save(filename="result.jpg")  # save to disk

In [None]:
model = YOLO("yolov10x.pt")
for i, ip in enumerate(input_paths):
    results = model.track(ip, save=True)

In [None]:
model = YOLO("yolov8x-pose-p6.pt")  # load an official model
for i, ip in enumerate(input_paths):
    results = model.track(ip, save=True)