# Imports

In [5]:
import os
import shutil
from tqdm.notebook import tqdm
from torchvision import datasets
import numpy as np
from data_augmenter import DataAugmenter

In [6]:
np.random.seed(123)  # for replication

## Copy the dataset

In [7]:
def copytree_with_progress(src, dst):
    os.makedirs(dst, exist_ok=True)
    files = os.listdir(src)
    total = len(files)  # Total files and directories for the progress bar
    
    with tqdm(total=total, desc=f"Copying {os.path.basename(src)}") as pbar:
        for item in files:
            s = os.path.join(src, item)
            d = os.path.join(dst, item)

            if os.path.isdir(s):
                # If the item is a directory, recurse into it
                copytree_with_progress(s, d)
            else:
                # If the item is a file, copy it
                shutil.copy2(s, d)
            pbar.update(1)  # Update the progress bar


copytree_with_progress('./dataset/GTSRB', './dataset/GTSRB-leveled')

Copying GTSRB:   0%|          | 0/4 [00:00<?, ?it/s]

Copying test:   0%|          | 0/43 [00:00<?, ?it/s]

Copying 00016:   0%|          | 0/150 [00:00<?, ?it/s]

Copying 00033:   0%|          | 0/210 [00:00<?, ?it/s]

Copying 00022:   0%|          | 0/120 [00:00<?, ?it/s]

Copying 00011:   0%|          | 0/420 [00:00<?, ?it/s]

Copying 00003:   0%|          | 0/450 [00:00<?, ?it/s]

Copying 00039:   0%|          | 0/90 [00:00<?, ?it/s]

Copying 00034:   0%|          | 0/120 [00:00<?, ?it/s]

Copying 00035:   0%|          | 0/390 [00:00<?, ?it/s]

Copying 00028:   0%|          | 0/150 [00:00<?, ?it/s]

Copying 00036:   0%|          | 0/120 [00:00<?, ?it/s]

Copying 00037:   0%|          | 0/60 [00:00<?, ?it/s]

Copying 00015:   0%|          | 0/210 [00:00<?, ?it/s]

Copying 00027:   0%|          | 0/60 [00:00<?, ?it/s]

Copying 00038:   0%|          | 0/690 [00:00<?, ?it/s]

Copying 00014:   0%|          | 0/270 [00:00<?, ?it/s]

Copying 00026:   0%|          | 0/180 [00:00<?, ?it/s]

Copying 00019:   0%|          | 0/60 [00:00<?, ?it/s]

Copying 00007:   0%|          | 0/450 [00:00<?, ?it/s]

Copying 00029:   0%|          | 0/90 [00:00<?, ?it/s]

Copying 00005:   0%|          | 0/630 [00:00<?, ?it/s]

Copying 00008:   0%|          | 0/450 [00:00<?, ?it/s]

Copying 00021:   0%|          | 0/90 [00:00<?, ?it/s]

Copying 00025:   0%|          | 0/480 [00:00<?, ?it/s]

Copying 00032:   0%|          | 0/60 [00:00<?, ?it/s]

Copying 00002:   0%|          | 0/750 [00:00<?, ?it/s]

Copying 00000:   0%|          | 0/60 [00:00<?, ?it/s]

Copying 00024:   0%|          | 0/90 [00:00<?, ?it/s]

Copying 00009:   0%|          | 0/480 [00:00<?, ?it/s]

Copying 00012:   0%|          | 0/690 [00:00<?, ?it/s]

Copying 00023:   0%|          | 0/150 [00:00<?, ?it/s]

Copying 00013:   0%|          | 0/720 [00:00<?, ?it/s]

Copying 00041:   0%|          | 0/60 [00:00<?, ?it/s]

Copying 00030:   0%|          | 0/150 [00:00<?, ?it/s]

Copying 00017:   0%|          | 0/360 [00:00<?, ?it/s]

Copying 00006:   0%|          | 0/150 [00:00<?, ?it/s]

Copying 00042:   0%|          | 0/90 [00:00<?, ?it/s]

Copying 00018:   0%|          | 0/390 [00:00<?, ?it/s]

Copying 00040:   0%|          | 0/90 [00:00<?, ?it/s]

Copying 00020:   0%|          | 0/90 [00:00<?, ?it/s]

Copying 00010:   0%|          | 0/660 [00:00<?, ?it/s]

Copying 00031:   0%|          | 0/270 [00:00<?, ?it/s]

Copying 00004:   0%|          | 0/660 [00:00<?, ?it/s]

Copying 00001:   0%|          | 0/720 [00:00<?, ?it/s]

Copying train:   0%|          | 0/43 [00:00<?, ?it/s]

Copying 00016:   0%|          | 0/421 [00:00<?, ?it/s]

Copying 00033:   0%|          | 0/690 [00:00<?, ?it/s]

Copying 00022:   0%|          | 0/391 [00:00<?, ?it/s]

Copying 00011:   0%|          | 0/1321 [00:00<?, ?it/s]

Copying 00003:   0%|          | 0/1411 [00:00<?, ?it/s]

Copying 00039:   0%|          | 0/301 [00:00<?, ?it/s]

Copying 00034:   0%|          | 0/421 [00:00<?, ?it/s]

Copying 00035:   0%|          | 0/1201 [00:00<?, ?it/s]

Copying 00028:   0%|          | 0/541 [00:00<?, ?it/s]

Copying 00036:   0%|          | 0/391 [00:00<?, ?it/s]

Copying 00037:   0%|          | 0/211 [00:00<?, ?it/s]

Copying 00015:   0%|          | 0/631 [00:00<?, ?it/s]

Copying 00027:   0%|          | 0/241 [00:00<?, ?it/s]

Copying 00038:   0%|          | 0/2071 [00:00<?, ?it/s]

Copying 00014:   0%|          | 0/781 [00:00<?, ?it/s]

Copying 00026:   0%|          | 0/601 [00:00<?, ?it/s]

Copying 00019:   0%|          | 0/211 [00:00<?, ?it/s]

Copying 00007:   0%|          | 0/1441 [00:00<?, ?it/s]

Copying 00029:   0%|          | 0/271 [00:00<?, ?it/s]

Copying 00005:   0%|          | 0/1861 [00:00<?, ?it/s]

Copying 00008:   0%|          | 0/1411 [00:00<?, ?it/s]

Copying 00021:   0%|          | 0/331 [00:00<?, ?it/s]

Copying 00025:   0%|          | 0/1501 [00:00<?, ?it/s]

Copying 00032:   0%|          | 0/241 [00:00<?, ?it/s]

Copying 00002:   0%|          | 0/2251 [00:00<?, ?it/s]

Copying 00000:   0%|          | 0/211 [00:00<?, ?it/s]

Copying 00024:   0%|          | 0/271 [00:00<?, ?it/s]

Copying 00009:   0%|          | 0/1471 [00:00<?, ?it/s]

Copying 00012:   0%|          | 0/2101 [00:00<?, ?it/s]

Copying 00023:   0%|          | 0/511 [00:00<?, ?it/s]

Copying 00013:   0%|          | 0/2161 [00:00<?, ?it/s]

Copying 00041:   0%|          | 0/241 [00:00<?, ?it/s]

Copying 00030:   0%|          | 0/451 [00:00<?, ?it/s]

Copying 00017:   0%|          | 0/1111 [00:00<?, ?it/s]

Copying 00006:   0%|          | 0/421 [00:00<?, ?it/s]

Copying 00042:   0%|          | 0/241 [00:00<?, ?it/s]

Copying 00018:   0%|          | 0/1201 [00:00<?, ?it/s]

Copying 00040:   0%|          | 0/361 [00:00<?, ?it/s]

Copying 00020:   0%|          | 0/361 [00:00<?, ?it/s]

Copying 00010:   0%|          | 0/2011 [00:00<?, ?it/s]

Copying 00031:   0%|          | 0/781 [00:00<?, ?it/s]

Copying 00004:   0%|          | 0/1981 [00:00<?, ?it/s]

Copying 00001:   0%|          | 0/2221 [00:00<?, ?it/s]

# Apply augmentation

In [8]:
# Directory containing the train dataset
dataset_path = './dataset/GTSRB-leveled'
train_dataset = datasets.ImageFolder(f'{dataset_path}/train')
# Calculate the length of each sublist using list comprehension
counts = np.bincount(train_dataset.targets)
max_count = np.max(counts) + 100

da = DataAugmenter(dataset_path)
da.load_images()

Classes found: , 00016, 00033, 00022, 00011, 00003, 00039, 00034, 00035, 00028, 00036, 00037, 00015, 00027, 00038, 00014, 00026, 00019, 00007, 00029, 00005, 00008, 00021, 00025, 00032, 00002, 00000, 00024, 00009, 00012, 00023, 00013, 00041, 00030, 00017, 00006, 00042, 00018, 00040, 00020, 00010, 00031, 00004, 00001


Loading classes: 100%|██████████| 43/43 [00:01<00:00, 27.21it/s]


In [9]:
da.augment_images(max_count)

Augmenting images: 100%|██████████| 43/43 [02:02<00:00,  2.85s/it]
