In [2]:
import torch
import pandas as pd
import numpy as np
import torch.utils
import torchvision.transforms
from torch import nn
from torch.nn import functional as F
import matplotlib.pyplot as plt
import os
from math import floor
%matplotlib inline

torch.set_printoptions(threshold=100000)

In [3]:
raw_path = "../Cyclist_Detection/raw"

In [4]:
target_path = "../Cyclist_Detection/Dataset"

In [5]:
#Negative bounding box padding
def yolo_negative_padding(path):

    try:
        path_labels = os.path.join(path,"labels")
        labels_dir = sorted(os.listdir(path_labels))
    except:
        raise ValueError("The file has to be split into images and labels folders")

    #Calculating the maximum amount of yolo formatted boxes amongst the text files
    M = 0

    for dir in labels_dir:
        with open(os.path.join(path_labels,dir),'r') as txt:
            lines = len(txt.readlines())
        if M < lines:
            M = lines
        txt.close()

    for dir in labels_dir:
        with open(os.path.join(path_labels,dir),'r') as txt:
            line_count = len(txt.readlines())

        txt.close()
        add_count = M - line_count

        with open(os.path.join(path_labels,dir),'a') as txt:
            for i in range(add_count):
                txt.write('-1 0.0 0.0 0.0 0.0\n')

        txt.close()

In [6]:
def yolo_load_data(path,target,split=None):
    """Used for loading .txt files with labels in the yolo format for object detection"""

    try:
        path_labels = os.path.join(path,"labels")
        labels_dir = sorted(os.listdir(path_labels))
    except:
        raise NameError("The file has to be split into images and labels folders")

    target_labels = os.path.join(target,"labels")
    paths_labels = []

    for dir in labels_dir:
        with open(os.path.join(path_labels,dir),'r') as txt:
            txt_lines = txt.readlines()
            txt.close()

        single_yolo = torch.empty((len(txt_lines),5))

        for j in range(len(txt_lines)):
            txt_lines[j] = txt_lines[j].strip()
            single_line = torch.tensor(list(map(float,txt_lines[j].split(" "))))
            single_yolo[j] = single_line

        target_dir = dir.replace(".txt",".pt")


        if not os.path.exists(target_labels):
            os.makedirs(target_labels)

        torch.save(single_yolo,os.path.join(target_labels,target_dir))
        paths_labels.append(os.path.join(target_labels,target_dir))

    try:
        path_images = os.path.join(path,"images")
        images_dir = sorted(os.listdir(path_images))
    except:
        raise NameError("The file has to be split into images and labels folders")


    target_images = os.path.join(target,"images")
    paths_images = []

    for dir in images_dir:
        img = torchvision.io.read_image(os.path.join(path_images,dir))

        target_dir = dir.replace(".jpg",".pt")



        if not os.path.exists(target_images):
            os.makedirs(target_images)

        torch.save(img,os.path.join(target_images,target_dir))
        paths_images.append(os.path.join(target_images,target_dir))

    src = pd.DataFrame({
        "images": paths_images,
        "labels": paths_labels
    })

    target_src = os.path.join(target,"src")

    if not os.path.exists(target_src):
        os.makedirs(target_src)




    if split is not None:
        if not isinstance(split,list):
            raise TypeError("Split of train,val,test samples have to come in form of a list, in that order")

        if sum(split) != 1:
            raise ValueError("Values of the train,val,test split have to sum up to one")




        train_split = floor(split[0]*len(src))
        val_split = floor(train_split + split[1]*len(src))

        train_src = src.iloc[0:train_split,:]
        val_src = src.iloc[train_split:val_split,:]
        test_src = src.iloc[val_split:len(src),:]

        if train_src.shape[0] > 0:
            train_src.to_csv(os.path.join(target_src,"train_src.csv"))

        if val_src.shape[0] > 0:
            val_src.to_csv(os.path.join(target_src,"val_src.csv"))

        if test_src.shape[0] > 0:
            test_src.to_csv(os.path.join(target_src,"test_src.csv"))


    src.to_csv(os.path.join(target_src,"src.csv"))

In [7]:
yolo_negative_padding(raw_path)

In [8]:
yolo_load_data(raw_path,target_path,split=[0.6,0.2,0.2])

In [None]:
##To quickly delete files:

In [46]:
delete_images = sorted(os.listdir(os.path.join(target_path,"images")))
delete_labels = sorted(os.listdir(os.path.join(target_path,"labels")))
delete_src = os.listdir(os.path.join(target_path,"src"))

for image in delete_images:
    os.remove(os.path.join(target_path,"images",image))

for label in delete_labels:
    os.remove(os.path.join(target_path,"labels",label))

#os.remove(os.path.join(target_path,"src","src.csv"))
os.remove(os.path.join(target_path,"src","train_src.csv"))
os.remove(os.path.join(target_path,"src","val_src.csv"))