In [1]:
import matplotlib.pyplot as plt
import numpy as np
import cv2 as cv
import os
from sklearn.model_selection import train_test_split

#I am going to train a custom yolov5 model that detects objects
#i have used this tutorial https://towardsdatascience.com/the-practical-guide-for-object-detection-with-yolov5-algorithm-74c04aac4843

In [2]:
#create the directories for training the yolov5 model
os.mkdir('data')
os.mkdir('data/images')
os.mkdir('data/labels')
os.mkdir('data/images/train')
os.mkdir('data/images/validation')
os.mkdir('data/images/test')
os.mkdir('data/labels/train')
os.mkdir('data/labels/validation')
os.mkdir('data/labels/test')

In [3]:
#method used in order to get all the frames from one video
def get_frames(video_path):
    images = []
    cap = cv.VideoCapture(video_path)
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret == True:
            images.append(frame)
        else:
            break
    cap.release()
    return images

In [4]:
#used in order to get the width and height of each video
img_template = cv.imread('train/01.jpg')
h = img_template.shape[0]
w = img_template.shape[1]
print(h, w)

720 1280


In [5]:
'''
I am going to create a dataset of bowling bowls.
In order to do that, I iterate through some videos from training data
and get the bounding boxes for each of them at each frame
'''

#the selected videos
train_videos = ['02', '03', '04', '06', '08', '09', '10', '11', '14', '15']

counter = 1

base = 'train/Task2/ground-truth/'
for video in train_videos:
    #iterate and get the bounding boxes
    file = base + video + "_gt.txt"
    f = open(file, 'r')
    text = f.readlines()
    f.close()
    path_video = 'train/Task2/' + video + ".mp4"
    #get the frames of the video
    frames = get_frames(path_video)
    i = 0
    for line in text:
        i += 1
        values = line.split()
        if i > 1:
            #unfortunately I can't use the raw given coordinates
            #i have to normalize each of them in order to train a yolov5 model
            coord1 = int(values[1])
            coord2 = int(values[2])
            coord3 = int(values[3])
            coord4 = int(values[4])
            #each coordinate has to be between 0 and 1
            x = (coord1 + coord3)/2/w
            y = (coord2 + coord4)/2/h

            height = (coord4 - coord2)/h
            width = (coord3 - coord1)/w

            actual_frame = frames[int(values[0])]

            #write all the coordinates in the file
            cv.imwrite('data/images/' + str(counter) + '.jpg', actual_frame)
            f = open('data/labels/' + str(counter) + '.txt', 'w')
            output = '0 ' + str(x) + ' ' + str(y) + ' ' + str(width) + ' ' + str(height)
            f.write(output)
            f.close()
            counter += 1


In [6]:

base_folder_images = 'data/images'
base_folder_labels = 'data/labels'


photos = []
labels = []
#i am going to get all the images and labels from the new dataset
for fname in os.listdir(base_folder_images):
    if fname[-4:] == '.txt' or fname[-4:] == '.jpg':
        filename_photo = base_folder_images + '/' + fname
        photos.append(filename_photo)
        filename_labels = base_folder_labels + '/' + fname[:-4] + '.txt'
        labels.append(filename_labels)
    #print(filename_photo, filename_labels)


#split the dataset in 3 parts: train, validation, test
X_train, X_validation, y_train, y_validation = train_test_split(photos, labels, test_size = 0.2)
X_validation, X_test, y_validation, y_test = train_test_split(X_validation, y_validation, test_size = 0.5)

In [8]:
sets = [[X_train, y_train], [X_validation, y_validation], [X_test, y_test]]


counter = 0
#now create 3 datasets by adding each frame+label in the corresponding directory
for i, set in enumerate(sets):
    images = set[0]
    labels = set[1]
    for j in range(len(images)):
        image = images[j]
        label = labels[j]

        img = cv.imread(image)
        f = open(label, 'r')
        text = f.read()
        f.close()

        if i == 0:
            cv.imwrite('data/images/train/' + str(counter) + '.jpg', img)
            new_f = open('data/labels/train/' + str(counter) + '.txt', 'w')
            new_f.write(text)
            new_f.close()

        elif i == 1:
            cv.imwrite('data/images/validation/' + str(counter) + '.jpg', img)
            new_f = open('data/labels/validation/' + str(counter) + '.txt', 'w')
            new_f.write(text)
            new_f.close()

        elif i == 2:
            cv.imwrite('data/images/test/' + str(counter) + '.jpg', img)
            new_f = open('data/labels/test/' + str(counter) + '.txt', 'w')
            new_f.write(text)
            new_f.close()

        counter += 1

data/images/9.jpg data/labels/9.txt
data/images/391.jpg data/labels/391.txt
data/images/53.jpg data/labels/53.txt
data/images/156.jpg data/labels/156.txt
data/images/523.jpg data/labels/523.txt
data/images/366.jpg data/labels/366.txt
data/images/518.jpg data/labels/518.txt
data/images/565.jpg data/labels/565.txt
data/images/29.jpg data/labels/29.txt
data/images/689.jpg data/labels/689.txt
data/images/106.jpg data/labels/106.txt
data/images/442.jpg data/labels/442.txt
data/images/481.jpg data/labels/481.txt
data/images/550.jpg data/labels/550.txt
data/images/609.jpg data/labels/609.txt
data/images/116.jpg data/labels/116.txt
data/images/661.jpg data/labels/661.txt
data/images/38.jpg data/labels/38.txt
data/images/265.jpg data/labels/265.txt
data/images/712.jpg data/labels/712.txt
data/images/281.jpg data/labels/281.txt
data/images/364.jpg data/labels/364.txt
data/images/71.jpg data/labels/71.txt
data/images/642.jpg data/labels/642.txt
data/images/614.jpg data/labels/614.txt
data/images/

In [1]:
!pip install -qr yolov5/requirements.txt

^C


In [11]:
!python yolov5/train.py --img 480 --cfg yolov5s.yaml --hyp hyp.scratch-high.yaml --batch 16 --epochs 1 --data config.yaml --weights yolov5s.pt --workers 24 --name yolo_rez
