In [None]:
import json
import os
from pathlib import Path
import shutil
from ultralytics import YOLO
from torchvision.transforms import v2
from torchvision.transforms.functional import resize
from tqdm.auto import tqdm
from torchvision.io import read_image
import numpy as np
import cv2
import torchvision

model = YOLO('last.pt')
model.to('cpu')

dataset_path = "SoccerNet/jersey-2023/"
train_path = os.path.join(dataset_path, 'train')

destination_folder = os.path.join(train_path, 'imagesV2')
os.makedirs(destination_folder, exist_ok=True)

to_gray = v2.Grayscale(num_output_channels=3)
train_gt = json.load(open(os.path.join(train_path, 'train_gt.json')))
number_counter = 0
images_counter = 0


for id in tqdm(os.listdir(os.path.join(train_path, 'images/')), desc="Analyzing players"):
    number_counter = 0
    player_folder = Path(os.path.join(destination_folder, f'{id}/'))
    player_folder.mkdir(parents=True, exist_ok=True)
    if train_gt[id] != -1:
        for image in tqdm(os.listdir(os.path.join(train_path, 'images/', id)), desc=f"Analyzing images for player id {id}"):
            image_path = os.path.join(train_path, 'images/', id, image)
            img= read_image(image_path)
            img = resize(img, (640, 640)) / 255.0
            img = to_gray(img)
            pred = model.predict(img.unsqueeze(0), verbose=False)
            if pred[0].boxes and pred[0].boxes.conf[0].item()> 0.4:
                box = pred[0].boxes.xyxy[0]
                array = np.array(box)
                x1 = int(array[0])
                y1 = int(array[1])
                x2 = int(array[2])
                y2 = int(array[3])
                
                img_numpy = img.permute(1, 2, 0).numpy()
                cropped_image = img_numpy[y1:y2, x1:x2]
                save_path = os.path.join(player_folder, f'{image}')
                cv2.imwrite(save_path, img_numpy*255.0)
                number_counter += 1
                images_counter += 1

        if number_counter == 0:
            save_path = os.path.join(player_folder, f'{image}')
            cv2.imwrite(save_path, img.permute(1, 2, 0).numpy()*255.0)
            print(f"Player has no detections, storing only 1 image {save_path}")
            number_counter += 1
            images_counter += 1
                
    else:
        for image in os.listdir(os.path.join(train_path, 'images/', id)):
            save_path = os.path.join(player_folder, f'{image}')
            image_path = os.path.join(train_path, 'images/', id, image)
            img= read_image(image_path)
            cv2.imwrite(save_path, img.permute(1, 2, 0).numpy()*255.0)
            break

        print(f"Player id: {id} label is already -1 , storing only 1 image {save_path}")
        number_counter += 1
        images_counter += 1

    print(f"Founded numbers: {number_counter} ")

print(f"New dataset total images: {images_counter}")

In [None]:
# CHECK DATASET INTEGRITY

train_gt = json.load(open(os.path.join(train_path, 'train_gt.json')))


for id in train_gt:
    print(f'Checking for key {id} ...')
    try:
        images = os.listdir(os.path.join(train_path, 'imagesV2/', id))
        if len(images) == 0:
            print(f'Folder {id} is empty !')
    except Exception:
        print(f"Folder {id} does not exist !")
