# Imports

In [21]:
import numpy as np
import os
import shutil
from tqdm import tqdm
import zipfile
import urllib.request

# Download the dataset

In [22]:
def download_file(url, file_name):
    if not os.path.exists('dataset/' + file_name):
        print("Downloading file...")
        with urllib.request.urlopen(url) as response, open('dataset/' + file_name, 'wb') as out_file:
            content_length = int(response.headers['Content-Length'])
            with tqdm(total=content_length, unit='B', unit_scale=True, desc=url.split('/')[-1]) as pbar:
                while True:
                    chunk = response.read(1024)
                    if not chunk:
                        break
                    out_file.write(chunk)
                    pbar.update(len(chunk))
        print("Download complete!")
    else:
        print(f"{file_name} already exists.")


# Training
download_file('https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB_Final_Training_Images.zip',
              'GTSRB_Final_Training_Images.zip')
# Testing
download_file('https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB_Final_Test_Images.zip',
              'GTSRB_Final_Test_Images.zip')
# Ground truth
download_file('https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/GTSRB_Final_Test_GT.zip',
              'GTSRB_Final_Test_GT.zip')

GTSRB_Final_Training_Images.zip already exists.
GTSRB_Final_Test_Images.zip already exists.
GTSRB_Final_Test_GT.zip already exists.


# Extract zip files

In [23]:
def extract_file(file_name):
    with zipfile.ZipFile(f"dataset/{file_name}", 'r') as zip_ref:
        file_list = zip_ref.namelist()
        with tqdm(total=len(file_list), desc="Extracting") as pbar:
            for file in file_list:
                zip_ref.extract(file, 'dataset/')
                pbar.update(1)


extract_file('GTSRB_Final_Training_Images.zip')
extract_file('GTSRB_Final_Test_Images.zip')
extract_file('GTSRB_Final_Test_GT.zip')

Extracting: 100%|██████████| 39299/39299 [00:05<00:00, 7304.62it/s]
Extracting: 100%|██████████| 12635/12635 [00:01<00:00, 7094.10it/s]
Extracting: 100%|██████████| 1/1 [00:00<00:00, 490.68it/s]


# Loading CSV file

In [24]:
#IMAGES: './dataset/GTSRB/test_images'
#CSV ANNOTATIONS: './dataset/GTSRB/test_images/GT-final_test.csv'
def csv_loader(csv_path):
    data = np.loadtxt(csv_path,
                      delimiter=";", dtype=str, skiprows=1)
    return data


#You should download the testset ('GTSRB_Final_Test_Images.zip') from the website which contains only the images
#Then you have to download the ground truth csv ('GTSRB_Final_Test_GT.zip') from the website and paste it into the testset images folder
annotations = csv_loader('./dataset/GT-final_test.csv')
#sort the annotations
annotations = annotations[:, [0, 7]]
num_samples = len(annotations)
#Column 0: filename - Column 1: classid
annotations = annotations[annotations[:, 1].astype(int).argsort()]

# Making training data accordingly

In [25]:
def move_directories(source, destination):
    if not os.path.exists(destination):
        os.makedirs(destination)
    # Get a list of all directories in the source directory
    directories = [d for d in os.listdir(source) if os.path.isdir(os.path.join(source, d))]

    # Move each directory to the destination
    for directory in tqdm(directories):
        source_path = os.path.join(source, directory)
        destination_path = os.path.join(destination, directory)
        shutil.move(source_path, destination_path)


# Move directories with contents
move_directories("./dataset/GTSRB/Final_Training/Images", "./dataset/GTSRB/train")
shutil.rmtree("./dataset/GTSRB/Final_Training")

100%|██████████| 43/43 [00:00<00:00, 41748.86it/s]


# Making test data accordingly

In [26]:
for class_id in tqdm(np.unique(annotations[:, 1]), desc='Class_ID'):
    newpath = './dataset/GTSRB/test/' + class_id.zfill(5)
    if not os.path.exists(newpath):
        os.makedirs(newpath)
    for image_filename in annotations[annotations[:, 1] == class_id]:
        shutil.move('./dataset/GTSRB/Final_Test/Images/' + image_filename[0], newpath + '/' + image_filename[0])

shutil.rmtree("./dataset/GTSRB/Final_Test")

Class_ID: 100%|██████████| 43/43 [00:00<00:00, 103.87it/s]
