In [14]:
import splitfolders
import numpy as np
from pandas import read_csv
import os
from pathlib import Path
import shutil
import random
import cv2
# from cv2 import dnn_superres
import collections
import scipy.ndimage as ndimage
import matplotlib.pyplot as plt
import seaborn as sns

Read and manually label each image by player's torso direction as follows:
1 - left
3 - right
5 - back
2 - face

In [None]:
labels = read_csv("data/images_labelling.csv")

sr_path = "trained_model/FSRCNN_x4.pb"
sr = dnn_superres.DnnSuperResImpl_create()
sr.readModel(sr_path)
sr.setModel("fsrcnn", 4)

Path("data/images_classes").mkdir(parents=True, exist_ok=True)
directions_all = []
imageNames_all = []
print("Processing images...")
for class_id in range(labels["label"].min(), labels["label"].max()+1):
    img_ids = np.array(labels[labels["label"] == class_id]["boxid"])
    Path("data/images_classes/"+str(class_id)).mkdir(parents=True, exist_ok=True)
    directions_classes = []
    imageNames_classes = []
    for i, img_id in enumerate(img_ids):
        image = cv2.imread("data/images/"+str(img_id)+".png")
        result = sr.upsample(image)
        cv2.imwrite("data/images_classes/"+str(class_id)+str(img_id)+".png", result)
        imageNames_classes.append(img_id)
        print(class_id, i)
        plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
        plt.show()
        k = input()
        if k == '1': 
            direction = 1
            clear_output(wait=True)
        elif k == '2': 
            direction = 2
            clear_output(wait=True)
        elif k == '3': 
            direction = 3
            clear_output(wait=True)
        elif k == '5': 
            direction = 5
            clear_output(wait=True)
        elif k == '0': 
            direction = 0
            clear_output(wait=True)
        directions_classes.append(direction)
    directions_all.append(np.array(directions_classes))
    imageNames_all.append(np.array(imageNames_classes))
print("Saving directions...")
np.savez("data/bodyDirections.npz", np.array(imageNames_all), np.array(directions_all))

Split each class in subfolders according to labeled directions

In [21]:
splitfolders.ratio("data/images_classes", output="data/images_splited_balanced_upscaled", seed=42, ratio=(.8, .1, .1), group_prefix=None)
directions = np.load("data/bodyDirections.npz")
img_ids = directions["arr_0"].reshape(-1)
img_directions = directions["arr_1"].reshape(-1)
directions_classes = [0,1,2,3,5]
train_path = 'data/images_splited_balanced_upscaled/train/'
for class_id in range(25):
    class_path = train_path + str(class_id)
    for direction in directions_classes:
        Path(class_path+"/"+str(direction)).mkdir(parents=True, exist_ok=True)
    img_names = os.listdir(class_path)
    for img_name in img_names:
        if img_name[-1] == 'g':
            img_direction_found = img_directions[np.where(img_ids == int(img_name[:-4]))]
            shutil.move(class_path+'/'+img_name, class_path+'/'+str(img_direction_found[0])+'/'+img_name)


Copying files: 0 files [00:00, ? files/s][A
Copying files: 347 files [00:00, 3469.67 files/s][A
Copying files: 927 files [00:00, 3936.92 files/s][A
Copying files: 1495 files [00:00, 4335.49 files/s][A
Copying files: 1837 files [00:00, 2998.90 files/s][A
Copying files: 2129 files [00:00, 2653.26 files/s][A
Copying files: 2578 files [00:00, 3024.38 files/s][A
Copying files: 2900 files [00:00, 2942.97 files/s][A
Copying files: 3290 files [00:00, 3163.42 files/s][A
Copying files: 3750 files [00:01, 3465.16 files/s][A


Plot directions distribution for some class

In [37]:
sns.displot(np.load("data/bodyDirections.npz")["arr_1"][4])

Balance every direction by augmentation rotation and mirroring (left-right, right-left directions) for each class to 65 images

In [22]:
low_angle = 5
high_angle = 10
filename_num = 100000

for try_num in range(2):
    for class_id in range(25):
        path = train_path+str(class_id)+'/'
        filenames_subclass_1 = os.listdir(path+"1")
        filenames_subclass_2 = os.listdir(path+"2")
        filenames_subclass_3 = os.listdir(path+"3")
        filenames_subclass_5 = os.listdir(path+"5")

        subclass_1 = len(os.listdir(path+'1/'))
        subclass_2 = len(os.listdir(path+'2/'))
        subclass_3 = len(os.listdir(path+'3/'))
        subclass_5 = len(os.listdir(path+'5/'))

        while subclass_1 < 65:
            if len(filenames_subclass_3) != 0:
                img = cv2.imread(path+"3/"+random.choice(filenames_subclass_3))
                angle = random.choice([(high_angle - low_angle) * np.random.random_sample() + low_angle, 
                                       (low_angle - high_angle) * np.random.random_sample() - low_angle])
                augmented_image = np.flip(ndimage.rotate(img, angle, reshape = False, mode = "nearest"),1)
                cv2.imwrite(path+"1/"+str(filename_num)+".png", augmented_image)
            filename_num += 1
            subclass_1 += 1

        while subclass_3 < 65:
            if len(filenames_subclass_1) != 0:
                img = cv2.imread(path+"1/"+random.choice(filenames_subclass_1))
                angle = random.choice([(high_angle - low_angle) * np.random.random_sample() + low_angle, 
                                       (low_angle - high_angle) * np.random.random_sample() - low_angle])
                augmented_image = np.flip(ndimage.rotate(img, angle, reshape = False, mode = "nearest"),1)
                cv2.imwrite(path+"3/"+str(filename_num)+".png", augmented_image)
            filename_num += 1
            subclass_3 += 1

        while subclass_2 < 65:
            if len(filenames_subclass_2) != 0:
                img = cv2.imread(path+"2/"+random.choice(filenames_subclass_2))
                angle = random.choice([(high_angle - low_angle) * np.random.random_sample() + low_angle, 
                                       (low_angle - high_angle) * np.random.random_sample() - low_angle])
                augmented_image = ndimage.rotate(img, angle, reshape = False, mode = "nearest")
                cv2.imwrite(path+"2/"+str(filename_num)+".png", augmented_image)
            filename_num += 1
            subclass_2 += 1

        while subclass_5 < 65:
            if len(filenames_subclass_5) != 0:
                img = cv2.imread(path+"5/"+random.choice(filenames_subclass_5))
                angle = random.choice([(high_angle - low_angle) * np.random.random_sample() + low_angle, 
                                       (low_angle - high_angle) * np.random.random_sample() - low_angle])
                augmented_image = ndimage.rotate(img, angle, reshape = False, mode = "nearest")
                cv2.imwrite(path+"5/"+str(filename_num)+".png", augmented_image)
            filename_num += 1
            subclass_5 += 1

If something went wrong delete all augmented images

In [64]:
delete_augmented = False
if delete_augmented:
    for class_id in range(25):
        for direct_id in directions_classes:
            path_imgs = train_path+str(class_id)+'/'+str(direct_id)
            images = os.listdir(path_imgs)
            for img in images:
                if int(img[:-4]) >= 100000:
                    os.remove(path_imgs+'/'+img)

Merge all directions back in class directory

In [23]:
train_path = 'data/images_splited_balanced_upscaled/train/'
directions_classes = [0,1,2,3,5]
for class_id in range(25):
    for direct_id in directions_classes:
        path_imgs = train_path+str(class_id)+'/'+str(direct_id)
        images = os.listdir(path_imgs)
        for img in images:
            shutil.move(path_imgs+'/'+img, train_path+str(class_id))
        os.rmdir(path_imgs)

Split data in 5 classes such as:
0 - blue team plus their goalkeeper
1 - white team plus their goalkeeper
2 - main referee
3 - side referee
4 - others

In [40]:
splitfolders.ratio("data/images_classes", output="data/images_splited_balanced_upscaled_teams", seed=42, ratio=(.8, .1, .1), group_prefix=None)

Copying files: 3750 files [00:00, 8929.19 files/s]


Balance num of images in all classes. Make it equal for all classes using random angle and flip augmentation.

In [41]:
path = 'data/images_splited_balanced_upscaled_teams/'
train_path = path + "train/"
val_path = path + "val/"
test_path = path + "test/"
paths = [train_path, val_path, test_path]
balanced_images_num_goalkeep = [2600, 150, 150]
balanced_images_num_others = [2750, 165, 165]

low_angle = 1
high_angle = 10
filename_num = 200000

goalkeepers = [3, 23]
referee_and_others = [8, 20, 24]
for path, needed_num_imgs_goalkeep, needed_num_images_others in zip(paths, balanced_images_num_goalkeep,
                                                                    balanced_images_num_others):
    for class_id in referee_and_others:
        print(class_id)
        image_names = os.listdir(path+str(class_id))
        num_images = len(image_names)
        while num_images < needed_num_images_others:
            flip_condition = random.randint(0, 1)
#             print(path+str(class_id)+'/'+random.choice(image_names))
            img = cv2.imread(path+str(class_id)+'/'+random.choice(image_names))
            angle = random.choice([(high_angle - low_angle) * np.random.random_sample() + low_angle, 
                                   (low_angle - high_angle) * np.random.random_sample() - low_angle])
            augmented_image = np.flip(ndimage.rotate(img, angle, reshape = False, mode = "nearest"),1)
            cv2.imwrite(path+str(class_id)+'/'+str(filename_num)+".png", augmented_image)
            filename_num += 1
            num_images+=1

8
20
24
8
20
24
8
20
24


Split blue and white teams in two separate datasets

In [42]:
team1 = [0, 1, 2, 3, 5, 6, 9, 10, 11, 14, 22]
team2 = [4, 7, 12, 13, 15, 16, 17, 18, 19, 21, 23]
old_classes = [np.array(team1), np.array(team2), np.array([8]), np.array([20]), np.array([24])]
new_classes = ["00", "01", "02", "03", "04"]
for path in paths:
    for old_classes_, new_class in zip(old_classes, new_classes):
        Path(path+new_class).mkdir(parents=True, exist_ok=True)
        for old_class in old_classes_:
            img_names = os.listdir(path+str(old_class))
            for img_name in img_names:
                shutil.move(path+str(old_class)+'/'+img_name, path+new_class)
            os.rmdir(path+str(old_class))