In [None]:
# Dataset generation
import time
from random import randint
import numpy as np
import cv2
from library.img_rando import *

# Train
random.seed('train')
PATH_ALL_IMAGES = '1_all_images/'
CSV_NAME = '1_all_images/all_labels'

# Set number of differnt iterations (min 1)
zoom_data_size= 5
rot_data_size = 3
POV_data_size = 3
pos_data_size = 3

# import images and cvs
path_cards = '0_cards_images'
path_background = '0_background_images'

# Create a list of all the card images
print("#########################################")
print("Card Images:")

images = np.empty((len(glob.glob(path_cards+"/*.png")),400,257,3), dtype="uint8")
i=0
for file in glob.glob(path_cards+"/*.png"):
    img = cv2.imread(file)
    img[img==0] = 1
    images[i,:,:,:] = np.asarray(img)
    # Print filename withou path and extension
    filename = file.split("\\")[-1].split(".")[0]
    print(f"{i}: {filename}")
    i+=1

images = np.array(images)
images[images[:,:,:,:]==0] = 1 # Image is  not allowed to have any 0 vales at the beginning

print("#########################################")
print("Background Images:")
back_images = np.empty((len(glob.glob(path_background+"/*.jpg")),1000,1500,3), dtype="uint8")
i=0
for file in glob.glob(path_background+"/*.jpg"):
    back_img = cv2.imread(file)
    if back_img.shape != (1000,1500,3):
        back_img = cv2.resize(back_img, (1500, 1000))
    back_images[i,:,:,:] = back_img
    i+=1

#Output some info
print(f"Backgrounds: {back_images.shape}")
print(f"Number of imges to be generated per card: {zoom_data_size* rot_data_size* POV_data_size*pos_data_size*3}")
print(f"Number of images to be generated in total: {zoom_data_size* rot_data_size* POV_data_size*pos_data_size*3*images.shape[0]}")
labels = np.array([('filename','width','height','class','xmin','ymin','xmax','ymax')])
count = 0

# Generate Data

start = time.time()
#print(PATH_ALL_IMAGES)
for img, index in zip(images,range(0,images.shape[0])):

    print("img_" ,index)

    for i_zoom in range(zoom_data_size):
        img_zoom = img_zoom_rand(img, maxZoom=2.1)
        #cv2.imshow("img_zoom", img_zoom)
        #cv2.waitKey(0)

        for i_rot in range(rot_data_size):
            img_rot = img_rot_rand(img_zoom)
            #cv2.imshow("img_rot", img_rot)
            #cv2.waitKey(0)

            for i_pov in range(POV_data_size):
                img_pov = img_3D_rand(img_rot)
                #cv2.imshow("img_pov", img_pov)
                #cv2.waitKey(0)
                for i_pos in range(pos_data_size):
                    rand=randint(0,back_images.shape[0]-1)
                    back = back_images[rand,:,:,:]     
                    img_pos,pos = img_pos_rand(img_pov,back)
                    #cv2.imshow("img_pos", img_pos)
                    #cv2.waitKey(0)

                    for kernel_size in [1,3,5]:
                        final_img=img_blure(img_pos,kernel_size)

                        img_pov_color=img_pov.copy()
                        img_pov_gray=cv2.cvtColor(img_pov, cv2.COLOR_BGR2GRAY)
                        contours, hierarchy=cv2.findContours(img_pov_gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

                        rect=cv2.boundingRect(contours[0])

                        #cv2.imshow("final_img", (final_img*255).astype(np.uint8))
                        #cv2.waitKey(0)

                        #save image
                        print(PATH_ALL_IMAGES+'img_'+str(index).rjust(2,"0")+"_"+str(count).rjust(6,"0")+".png")
                        cv2.imwrite(PATH_ALL_IMAGES+'img_'+str(index).rjust(2,"0")+"_"+str(count).rjust(6,"0")+".png",(final_img*255).astype(np.uint8))
                        labels=np.append(labels,[('img_'+str(index).rjust(2,"0")+"_"+str(count).rjust(6,"0")+".png",
                                                            final_img.shape[1],final_img.shape[0],index,pos[0]+rect[0],pos[1]+rect[1],rect[0]+rect[2]+pos[0],rect[1]+rect[3]+pos[1])],0)
                        count += 1

end = time.time()
print(f"Total time taken: {round(end-start)}s")

# Save labels
np.savetxt(CSV_NAME+".csv", labels, delimiter=",",fmt='%s')

cv2.destroyAllWindows()


In [None]:
# Generate labels
import numpy as np
import pandas as pd
import cv2

data=pd.read_csv('1_all_images/all_labels.csv')
PATH='1_all_images/'

print("Nr of images: ", len(data))
for i in range(len(data)):
    label=[] #class center_x center_y width height - every mesurement is relative to the image size
    print(i)
    img_class = data.iloc[i]['class']
    center_x = (data.iloc[i]['xmin']+data.iloc[i]['xmax'])/2
    center_y = (data.iloc[i]['ymin']+data.iloc[i]['ymax'])/2
    img_width = data.iloc[i]['width']
    img_height = data.iloc[i]['height']
    label=np.append(label,[img_class, round(center_x/img_width,5), round(center_y/img_height,5), round((data.iloc[i]['xmax']-data.iloc[i]['xmin'])/img_width,5), round((data.iloc[i]['ymax']-data.iloc[i]['ymin'])/img_height,5)])
    np.savetxt(PATH+data.iloc[i]['filename'][0:-4]+".txt", label, newline=" ",fmt='%s')
    #img = cv2.imread(PATH+data.iloc[i]['filename'])
    #cv2.rectangle(img,(int(data.iloc[i]['xmin']),int(data.iloc[i]['ymin'])),(int(data.iloc[i]['xmax']),int(data.iloc[i]['ymax'])),(0,255,0),2)
    #print(label)
    #cv2.imshow('img',img)
    #cv2.waitKey(0)


In [None]:
# Train test validation split
import os
import fnmatch
import shutil
import random

all_images = '1_all_images/'
path_train = '2_splited_data/train/'
path_test = '2_splited_data/test/'
path_validation = '2_splited_data/validation/'
image_datatypes = ['jpg', 'png', 'jpeg', 'bmp']
nr_train= 0
nr_test= 0
nr_validation= 0

# Check if the train folder exists
if not os.path.exists(path_train):
    os.makedirs(path_train)

for root, dir, files in os.walk(path_train):
    if files != []:
        raise Exception("Train folder is not empty.")

# Check if the test folder exists
if not os.path.exists(path_test):
    os.makedirs(path_test)

for root, dir, files in os.walk(path_test):
    if files != []:
        raise Exception("Test folder is not empty.")

# Check if the validation folder exists
if not os.path.exists(path_validation):
    os.makedirs(path_validation)

for root, dir, files in os.walk(path_validation):
    if files != []:
        raise Exception("Validation folder is not empty.")

for root, dir, files in os.walk(all_images):
    for items in fnmatch.filter(files, "*"):
        if items[-3:len(items)] in image_datatypes:
            random_number = random.random()
            print(path_train+items)
            print(path_train+items[0:-4]+".txt")
            if 0 < random_number < 0.7:
                shutil.move(all_images+items, path_train+items)
                shutil.move(all_images+items[0:-4]+".txt", path_train+items[0:-4]+".txt")
                nr_train += 1

            elif 0.7 < random_number < 0.85:
                shutil.move(all_images+items, path_validation+items)
                shutil.move(all_images+items[0:-4]+".txt", path_validation+items[0:-4]+".txt")
                nr_validation += 1

            else:
                shutil.move(all_images+items, path_test+items)
                shutil.move(all_images+items[0:-4]+".txt", path_test+items[0:-4]+".txt")
                nr_test += 1

print(f"Number of train images: {nr_train}")
print(f"Number of test images: {nr_test}")
print(f"Number of validation images: {nr_validation}")


In [None]:
# Train model
from ultralytics import YOLO
import torch
from GPUtil import showUtilization as gpu_usage
from numba import cuda
import cv2

def free_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage()                             

    torch.cuda.empty_cache()

    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)

    print("GPU Usage after emptying the cache")
    gpu_usage()

if __name__ == '__main__':
    #torch.cuda.empty_cache()
    free_gpu_cache()
    
    # Load a model
    model = YOLO('yolov10n.pt')  # Load pretrained model

    # Train the model
    model.train(data="2_splited_data/dataset.yaml", epochs=40, batch=16, imgsz=640, pretrained=True, single_cls=False, patience=5, dropout=0.1, verbose=True, device=0, save_period=2)

    # Validate the model
    metrics = model.val()  # no arguments needed, dataset and settings remembered
    metrics.box.map    # map50-95
    metrics.box.map50  # map50
    metrics.box.map75  # map75
    metrics.box.maps   # a list contains map50-95 of each category

    print(model.names)
