# Notebook to prepare the data for training Yolo v5 in a multitasks way (object detection and scene classification)

The dataset needs to be as ../datasets/example_dataset/ :


- data.yaml
- .jpg
- ...
- .txt (detection annotations, vanilla yolo format)
- ...
- train_cls.csv
- val_cls.csv
  - | filename | road_condition | detect
  - | example.jpg | (0=dry, 1=snowy, 2=wet) | 1 if use this img for detection (if from esmart_wip) 0 otherwise (no detection label)

- train.txt
- val.txt

## Imports

In [1]:
import os
import shutil
import pandas as pd

## Set the data source

In [4]:
# path = '/Users/selimgilon/Desktop/data_road_cond_seq_split_2_test'
path = '/home/selim/Desktop/datasets/esmart_context'
img_folders = [f"{path}/train/images/", f"{path}/val/images/"]
label_folders = [f"{path}/train/labels/", f"{path}/val/labels/"]

In [19]:
for (im_fol, lab_fol) in iter(zip(img_folders, label_folders)):
    print(im_fol, "-------", lab_fol)

/home/selim/Desktop/datasets/data_road_cond_seq_split_2_test/train/images/ ------- /home/selim/Desktop/datasets/data_road_cond_seq_split_2_test/train/labels/
/home/selim/Desktop/datasets/data_road_cond_seq_split_2_test/val/images/ ------- /home/selim/Desktop/datasets/data_road_cond_seq_split_2_test/val/labels/


## Create a *null* txt file for each image which doesn't have one yet

In [10]:
# for each image in the folder, create a text file with the same name, and write '' in it
#for (img_folder, label_folder) in iter(zip(img_folders, label_folders)):
#    print(img_folder, "-------", label_folder)
#    count = 0
count = 0
img_folder = '/home/selim/Desktop/esmart-ai-datasets/data/esmart_context/'
for filename in os.listdir(img_folder):
# for filename in os.listdir(f'{img_folder}'):
    if filename.endswith(".jpg"):
        # check if the file already exists
        #if not os.path.isfile(img_folder + filename[:-4] + '.txt'):
            # create the file if it doesn't exist
        f = open(img_folder + filename[:-4] + '.txt', 'w')
        f.write('')
        count += 1
        f.close()
    else:
        continue
print("number of files created: ", count)

number of files created:  59817


## Get the road conditions labels (from folders) and create a CSV file with these

In [63]:
for img_folder in iter(img_folders):
    train, val = False, False
    if 'train' in img_folder:
        train = True
    elif 'val' in img_folder:
        val = True
    print(img_folder)
    to_write = {'filename': [], 'road_condition': []}
    # class_name_to_num = {'n02102040': 0, 'n01440764': 1, 'n02979186': 2}
    class_name_to_num = {'dry': 0, 'snowy': 1, 'wet': 2}
    # list the folders that are in the img_folder
    classes_dir = os.listdir(img_folder)
    if '.DS_Store' in classes_dir:
        classes_dir.remove('.DS_Store')
    assert len(classes_dir) == 3
    for road_cond in classes_dir:
        print(road_cond)
        for filename in os.listdir(f"{img_folder}/{road_cond}"):
            to_write['filename'].append(filename)
            to_write['road_condition'].append(class_name_to_num[road_cond])

    to_write_df = pd.DataFrame(to_write)
    if train:
        name = f'{path}/train_cls.csv'
        to_write_df.to_csv(name, index=False)
        print('File saved at', name)
    elif val:
        name = f'{path}/val_cls.csv'
        to_write_df.to_csv(name, index=False)
        print('File saved at', name)

/Users/selimgilon/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Montreal/UdeM/Internship/E-Smart/code/datasets/imagenette160small/train/images/
n02102040
n01440764
n02979186
File saved at /Users/selimgilon/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Montreal/UdeM/Internship/E-Smart/code/datasets/imagenette160small/train_cls.csv
/Users/selimgilon/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Montreal/UdeM/Internship/E-Smart/code/datasets/imagenette160small/val/images/
n02102040
n01440764
n02979186
File saved at /Users/selimgilon/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Montreal/UdeM/Internship/E-Smart/code/datasets/imagenette160small/val_cls.csv


## Move the images from their road condition directory to train/images or val/images

In [9]:
for img_folder in iter(img_folders):
    print(img_folder)
    #classes_dir = os.listdir(img_folder)
    #if '.DS_Store' in classes_dir:
    #    classes_dir.remove('.DS_Store')
    #assert len(classes_dir) == 3
    #for road_cond in classes_dir:
    #print(road_cond)
    # get the list of files in the folder
    #for filename in os.listdir(f'{img_folder}/{road_cond}'):
    for filename in os.listdir(img_folder):
        if filename.endswith(".jpg"):
            shutil.move(f"{img_folder}{filename}", f"/home/selim/Desktop/datasets/esmart_context/{filename}")
            break
        else:
            continue

/home/selim/Desktop/datasets/esmart_context/train/images/
/home/selim/Desktop/datasets/esmart_context/val/images/


## Rename files if necessary

In [76]:
for label_folder in iter(label_folders):
    print(label_folder)
    # get the list of files in the folder
    for filename in os.listdir(label_folder):
        if filename.endswith("..txt"):
            # print(os.path.join(label_folder, filename))
            new_name = filename.replace('..txt', '.txt')
            os.rename(os.path.join(label_folder, filename), os.path.join(label_folder, new_name))
            # print('replaced')
        else:
            continue

/Users/selimgilon/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Montreal/UdeM/Internship/E-Smart/code/datasets/imagenette160small/train/labels/
/Users/selimgilon/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Montreal/UdeM/Internship/E-Smart/code/datasets/imagenette160small/val/labels/


In [42]:
# for each image in the folder, create a text file with the same name, and write '0 0 0 0 0' in it
label_folder = '/home/selim/Desktop/datasets/wip/train/labels/'
for filename in os.listdir(label_folder):
# for filename in os.listdir(f'{img_folder}'):
    if filename.endswith(".txt"):
        with open(label_folder + filename) as f:
            lines = f.readlines()
            for line in lines:
                if '0.66359' in line:
                    print(filename)
                    break


20210630_192517_8322.txt
out_8932.txt
20210724_113301_14454.txt


In [27]:
PATH = '/home/selim/Desktop/datasets/wip/'

In [37]:
train_wip = pd.read_csv(f'{PATH}val_labels.csv')
train_wip

Unnamed: 0.1,Unnamed: 0,filename,road_condition,detect
0,0,run_11_1065.jpg,0,1
1,1,run_11_1066.jpg,0,1
2,2,run_11_1068.jpg,0,1
3,3,run_11_1069.jpg,0,1
4,4,run_11_1156.jpg,0,1
...,...,...,...,...
511,511,Log-20220120-164410 Data Log_13799.jpg,2,1
512,512,Log-20220120-164410 Data Log_13841.jpg,2,1
513,513,Log-20220120-164410 Data Log_14933.jpg,0,1
514,514,Log-20220120-164410 Data Log_14996.jpg,0,1


In [38]:
train_list = train_wip['filename']
train_list

0                             run_11_1065.jpg
1                             run_11_1066.jpg
2                             run_11_1068.jpg
3                             run_11_1069.jpg
4                             run_11_1156.jpg
                        ...                  
511    Log-20220120-164410 Data Log_13799.jpg
512    Log-20220120-164410 Data Log_13841.jpg
513    Log-20220120-164410 Data Log_14933.jpg
514    Log-20220120-164410 Data Log_14996.jpg
515    Log-20220120-164410 Data Log_15689.jpg
Name: filename, Length: 516, dtype: object

In [32]:
img_folder = '/home/selim/Desktop/datasets/wip/train/images/'
label_folder =  '/home/selim/Desktop/datasets/wip/train/labels/'
count = 0
for filename in os.listdir(img_folder):
# for filename in os.listdir(f'{img_folder}'):
    if filename not in train_list:
        os.remove(img_folder + filename)
        os.remove(label_folder + filename[:-4] + '.txt')
        count += 1
print(count)

38645


In [39]:
count = 0
for file in train_list:
    try:
        shutil.copy(f'/home/selim/Desktop/esmart-ai-datasets/data/esmart_wip/{file}', f'/home/selim/Desktop/datasets/wip/val/images/{file}')
    except:
        print('not found')
    try:
        shutil.copy(f'/home/selim/Desktop/esmart-ai-datasets/data/esmart_wip/{file[:-4]}.txt', f'/home/selim/Desktop/datasets/wip/val/labels/{file[:-4]}.txt')
    except:
        print('not found')
    count += 1
print(count)

516
