## imagegenerator 폴더 구조 생성

- 구조
    - SAT
       - train
       - val
       - test
    - ROAD
       - ...
    - DEM
       - ...

In [1]:
import random
import os
from shutil import copyfile

random.seed(a=1014) 

sat_data_folder = '../data/foilum-image_kr'
sat_image_folder = os.path.join(sat_data_folder, 'png_Vworld')
road_data_folder = '../data/for_cnn2'
road_image_folder = '../data/for_cnn2/images/'
dem_image_folder = '../data/alos'
csv_file = f'{road_data_folder}/lon_lat_list_no_apply_is_raw.csv'

road_save_folder = '../data/three_image(3_label)/road'
road_save_folders = [f'{road_save_folder}/train',f'{road_save_folder}/val', f'{road_save_folder}/test']
sat_save_folder = '../data/three_image(3_label)/sat'
sat_save_folders = [f'{sat_save_folder}/train',f'{sat_save_folder}/val', f'{sat_save_folder}/test']
dem_save_folder = '../data/three_image(3_label)/dem'
dem_save_folders = [f'{dem_save_folder}/train',f'{dem_save_folder}/val', f'{dem_save_folder}/test']

def make_dirs(dirs):
    if not os.path.exists(dirs):
        os.makedirs(dirs)

def get_train(data_list):
    return data_list[:int(len(data_list)*0.8)]
def get_val(data_list):
    return data_list[int(len(data_list)*0.9):]
def get_test(data_list):
    return data_list[int(len(data_list)*0.8):int(len(data_list)*0.9)]

funcdict = {
    '0': get_train,
    '1': get_val,
    '2': get_test
}

In [2]:
def process_label(df, label, data_num= 10000):
    df1 = df.sample(n=min(data_num, len(df)), random_state=1004, replace=False)
    for i in range(3):
        make_dirs(f'{road_save_folders[i]}\\{label}')
        make_dirs(f'{sat_save_folders[i]}\\{label}')
        make_dirs(f'{dem_save_folders[i]}\\{label}')
        df1['filename'] = df1.apply(lambda x: f'{x["lat"]}_{x["lon"]}', axis=1)
        data_list = funcdict[str(i)](df1['filename'].values.tolist())
        for idx in data_list:
            road_src_path = f'{road_image_folder}/{idx}_net.png'
            sat_src_path = f'{sat_image_folder}/{idx}.png'
            dem_src_path = f'{dem_image_folder}/{idx}_dem.png'
            road_dst_path = f'{road_save_folders[i]}/{label}/{idx}.png'
            sat_dst_path = f'{sat_save_folders[i]}/{label}/{idx}.png'
            dem_dst_path = f'{dem_save_folders[i]}/{label}/{idx}.png'
            if os.path.exists(road_src_path) and os.path.exists(sat_src_path):
                copyfile(road_src_path, road_dst_path)
                copyfile(sat_src_path, sat_dst_path)
                copyfile(dem_src_path, dem_dst_path)

In [3]:
def split_3label_df(df, label):
    import numpy as np
    danger = df[df['위험도'] != 0.0]['위험도'].values.tolist()
    q1 = np.quantile(danger, 0.75)
    print(q1)
    if label == 1:
        return df[df['위험도'] == 0.0]
    if label == 2:
        return df[ (df['위험도'] >0.0)  &  (df['위험도'] <= q1)]
    if label == 3:
        return df[(df['위험도'] > q1)]

In [4]:
def process( csv_path ):
    import pandas as pd
    df = pd.read_csv(csv_path, encoding='cp949', engine='python')
    for label in range(1,4):
        df2 = split_3label_df(df, label)
        print(len(df2))
        process_label( df2, label)
    
            
process(csv_file)

1.2561999999999998
10362
1.2561999999999998
7782
1.2561999999999998
2580
