<a href="https://colab.research.google.com/github/Rumeysakeskin/Object-Detection/blob/main/prepare_dataset_for__YOLO_format.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# https://sid.erda.dk/public/archives/ff17dc924eba88d5d01a807357d6614c/published-archive.html

# Install and unzip dataset
import zipfile, urllib.request, shutil
url = "https://sid.erda.dk/public/archives/ff17dc924eba88d5d01a807357d6614c/FullIJCNN2013.zip" 
file_name = 'FullIJCNN2013.zip'

with urllib.request.urlopen(url) as response, open(file_name, 'wb') as out_file:
    shutil.copyfileobj(response, out_file)
    with zipfile.ZipFile(file_name) as zf:
        zf.extractall()

In [2]:
import os
import pandas as pd
import cv2


*   LIST OF CATEGORIES




In [3]:
# Defining lists for categories according to the classes ID's
# Prohibitory category:
# circular Traffic Signs with white background and red border line
p = [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 15, 16]

# Danger category:
# triangular Traffic Signs with white background and red border line
d = [11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]

# Mandatory category:
# circular Traffic Signs with blue background
m = [33, 34, 35, 36, 37, 38, 39, 40]

# Other category:
o = [6, 12, 13, 14, 17, 32, 41, 42]


*   LOADING ORIGINAL ANNOTATIONS



In [4]:
ann = pd.read_csv("FullIJCNN2013" + '/' + 'gt.txt', 
                  names=['ImageID', 'XMin', 'YMin', 'XMax', 'YMax', 'ClassID'], 
                  sep=';')
ann.head()

Unnamed: 0,ImageID,XMin,YMin,XMax,YMax,ClassID
0,00000.ppm,774,411,815,446,11
1,00001.ppm,983,388,1024,432,40
2,00001.ppm,386,494,442,552,38
3,00001.ppm,973,335,1031,390,13
4,00002.ppm,892,476,1006,592,39



*   CALCULATION OF BOUNDING BOX'S PARAMETERS





In [5]:
# Adding new empty columns to dataFrame to save numbers for YOLO format
ann['CategoryID'] = ''
ann['center x'] = ''
ann['center y'] = ''
ann['width'] = ''
ann['height'] = ''

# Getting category's ID according to the class's ID
ann.loc[ann['ClassID'].isin(p), 'CategoryID'] = 0
ann.loc[ann['ClassID'].isin(d), 'CategoryID'] = 1
ann.loc[ann['ClassID'].isin(m), 'CategoryID'] = 2
ann.loc[ann['ClassID'].isin(o), 'CategoryID'] = 3

# Calculating bounding box's center in x and y for all rows
ann['center x'] = (ann['XMax'] + ann['XMin']) / 2
ann['center y'] = (ann['YMax'] + ann['YMin']) / 2

# Calculating bounding box's width and height for all rows
ann['width'] = ann['XMax'] - ann['XMin']
ann['height'] = ann['YMax'] - ann['YMin']

In [6]:
# By using copy() we create separate dataFrame and initial dataFrame will not be changed.
r = ann.loc[:, ['ImageID',
                'CategoryID',
                'center x',
                'center y',
                'width',
                'height']].copy()
r.head()

Unnamed: 0,ImageID,CategoryID,center x,center y,width,height
0,00000.ppm,1,794.5,428.5,41,35
1,00001.ppm,2,1003.5,410.0,41,44
2,00001.ppm,2,414.0,523.0,56,58
3,00001.ppm,3,1002.0,362.5,58,55
4,00002.ppm,2,949.0,534.0,114,116



*   NORMALIZING BOUNDING BOX'S PARAMETERS
*   SAVING ANNOTATONS IN TXT FILES
*   COVERTING IMAGES FROM PPM TO JPG
















In [13]:
path_dir = "/content/FullIJCNN2013/"
path = os.listdir(path_dir)

for file in path:
      
    # Checking if filename ends with '.ppm'
    if file.endswith('.ppm'):
        # Reading image and getting its real width and height
        image_ppm = cv2.imread(file)

        # Slicing from tuple only first two elements
        h, w = image_ppm.shape[:2]
        # Slicing only name of the file without extension
        image_name = file[:-4]

        sub_r = r.loc[r['ImageID'] == file].copy()

        # Normalizing calculated bounding boxes' coordinates according to the real image width and height
        sub_r['center x'] = sub_r['center x'] / w
        sub_r['center y'] = sub_r['center y'] / h
        sub_r['width'] = sub_r['width'] / w
        sub_r['height'] = sub_r['height'] / h

        resulted_frame = sub_r.loc[:, ['CategoryID',
                                           'center x',
                                           'center y',
                                           'width',
                                           'height']].copy()

        # Checking if there is no any annotations for current image
        if resulted_frame.isnull().values.all():
            # Skipping this image
            continue

        # Saving resulted Pandas dataFrame into txt file
        path_to_save = path_dir + image_name + '.txt'
        if not os.path.exists(path_to_save):
          resulted_frame.to_csv(path_to_save, header=False, index=False, sep=' ')

        # Saving image in jpg format by OpenCV function            
        path_to_save = path_dir + image_name + '.jpg'
        if not os.path.exists(path_to_save):
          cv2.imwrite(path_to_save, image_ppm)


*   CREATING FILES TRAIN.TXT AND TEST.TXT




In [27]:
# Defining list to write paths in
p = []

for file in path:
    # Checking if filename ends with '.jpg'
    if file.endswith('.jpg'):
        # Preparing path to save into train.txt file
        path_to_save_into_txt_files = path_dir + file
        p.append(path_to_save_into_txt_files + '\n')

# Slicing first 15% of elements from the list to write into the test.txt file
p_test = p[:int(len(p) * 0.15)]

# Write into the train.txt file rest of the elements
p_train = p[int(len(p) * 0.15):]


# Creating file train.txt and writing 85% of lines in it
with open('train.txt', 'w') as train_txt:
    for e in p_train:
        # Writing current path at the end of the file
        train_txt.write(e)

# Creating file test.txt and writing 15% of lines in it
with open('test.txt', 'w') as test_txt:
    for e in p_test:
        # Writing current path at the end of the file
        test_txt.write(e)


*   CREATING TRAFFIC_SIGN_DATA.DATA AND CLASSES.NAMES




In [38]:
NAME_COUNTER = 0
categories = ["prohibitory", "danger", "mandatory", "other"]
with open(path_dir + 'classes.names', 'w') as names:
    for cat in categories:
        names.write(cat + '\n')  # Copying all info from file txt to names
NUM_CATEGORY = len(categories)     
NUM_CATEGORY

4

In [41]:
with open(path_dir + 'ts_data.data', 'w') as data:
    # Writing needed 5 lines
    # Number of classes
    # By using '\n' we move to the next line
    data.write('classes = ' + str(NUM_CATEGORY) + '\n')

    # Location of the train.txt file
    data.write('train = ' + path_dir + 'train.txt' + '\n')

    # Location of the test.txt file
    data.write('valid = ' + path_dir + 'test.txt' + '\n')

    # Location of the classes.names file
    data.write('names = ' + path_dir + 'classes.names' + '\n')

    # Location where to save weights
    data.write('backup = backup')