# Target List Generator
---

### import file list

In [14]:
import os
import glob

In [21]:
BASE_PATH = os.path.join(os.getenv('HOME'), 'workspace/Hackerton') # project folder
DATA_PATH = os.path.join(BASE_PATH, 'RSI_OP_NIA_AIHUB') # data folder

BUILDINGS_TRAIN_LABEL_PATH = os.path.join(DATA_PATH, 'buildings/training/raw')
BUILDINGS_TEST_LABEL_PATH = os.path.join(DATA_PATH, 'buildings/validation/raw')
ROADS_TRAIN_LABEL_PATH = os.path.join(DATA_PATH, 'roads/training/raw')
ROADS_TEST_LABEL_PATH = os.path.join(DATA_PATH, 'roads/validation/raw')

In [22]:
buildings_train_label_pattern = BUILDINGS_TRAIN_LABEL_PATH + r"/*.png"
buildings_test_label_pattern = BUILDINGS_TEST_LABEL_PATH + r"/*.png"
roads_train_label_pattern = ROADS_TRAIN_LABEL_PATH + r"/*.png"
roads_test_label_pattern = ROADS_TEST_LABEL_PATH + r"/*.png"

In [23]:
buildings_train_label_paths = glob.glob(buildings_train_label_pattern)
buildings_test_label_paths = glob.glob(buildings_test_label_pattern)
roads_train_label_paths = glob.glob(roads_train_label_pattern)
roads_test_label_paths = glob.glob(roads_test_label_pattern)

print(f'len(buildings_train_label_paths) : {len(buildings_train_label_paths)}')
print(f'len(buildings_val_label_paths) : {len(buildings_test_label_paths)}')
print(f'len(roads_train_label_paths) : {len(roads_train_label_paths)}')
print(f'len(roads_val_label_paths) : {len(roads_test_label_paths)}')

len(buildings_train_label_paths) : 1239
len(buildings_val_label_paths) : 159
len(roads_train_label_paths) : 1144
len(roads_val_label_paths) : 127


In [24]:
buildings_train_label_paths[:5]

['/home/aiffel-dj1/workspace/Hackerton/RSI_OP_NIA_AIHUB/buildings/training/raw/BLD09126_PS3_K3A_NIA0386.png',
 '/home/aiffel-dj1/workspace/Hackerton/RSI_OP_NIA_AIHUB/buildings/training/raw/BLD03548_PS3_K3A_NIA0376.png',
 '/home/aiffel-dj1/workspace/Hackerton/RSI_OP_NIA_AIHUB/buildings/training/raw/BLD02378_PS3_K3A_NIA0374.png',
 '/home/aiffel-dj1/workspace/Hackerton/RSI_OP_NIA_AIHUB/buildings/training/raw/BLD00299_PS3_K3A_NIA0276.png',
 '/home/aiffel-dj1/workspace/Hackerton/RSI_OP_NIA_AIHUB/buildings/training/raw/BLD00085_PS3_K3A_NIA0276.png']

### split training and validation set

In [36]:
from sklearn.model_selection import train_test_split

SEED = 2021
building_train, building_val = train_test_split(buildings_train_label_paths, test_size=150, shuffle=True, random_state=SEED)
print(len(building_train), len(building_val), len(buildings_test_label_paths))

1089 150 159


In [37]:
road_train, road_val = train_test_split(roads_train_label_paths, test_size=120, shuffle=True, random_state=SEED)
print(len(road_train), len(road_val), len(roads_test_label_paths))

1024 120 127


### save list

#### building

In [29]:
with open(os.path.join(DATA_PATH, 'B_train_list.txt'), 'w', encoding='UTF-8') as f:
    lines = [path.split('/')[-1] for path in building_train]
    lines.sort()
    f.write(lines[0])
    
    for line in lines[1:]:
        f.write('\n' + line)

In [30]:
with open(os.path.join(DATA_PATH, 'B_val_list.txt'), 'w', encoding='UTF-8') as f:
    lines = [path.split('/')[-1] for path in building_val]
    lines.sort()
    f.write(lines[0])
    
    for line in lines[1:]:
        f.write('\n' + line)

In [31]:
with open(os.path.join(DATA_PATH, 'B_test_list.txt'), 'w', encoding='UTF-8') as f:
    lines = [path.split('/')[-1] for path in buildings_test_label_paths]
    lines.sort()
    f.write(lines[0])
    
    for line in lines[1:]:
        f.write('\n' + line)

#### road

In [32]:
with open(os.path.join(DATA_PATH, 'R_train_list.txt'), 'w', encoding='UTF-8') as f:
    lines = [path.split('/')[-1] for path in road_train]
    lines.sort()
    f.write(lines[0])
    
    for line in lines[1:]:
        f.write('\n' + line)

In [33]:
with open(os.path.join(DATA_PATH, 'R_val_list.txt'), 'w', encoding='UTF-8') as f:
    lines = [path.split('/')[-1] for path in road_val]
    lines.sort()
    f.write(lines[0])
    
    for line in lines[1:]:
        f.write('\n' + line)

In [34]:
with open(os.path.join(DATA_PATH, 'R_test_list.txt'), 'w', encoding='UTF-8') as f:
    lines = [path.split('/')[-1] for path in roads_test_label_paths]
    lines.sort()
    f.write(lines[0])
    
    for line in lines[1:]:
        f.write('\n' + line)