In [None]:
import os, cv2, glob, shutil
import numpy as np
import pandas as pd
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
import json
import shutil

In [None]:
workspace_path = os.getcwd() 
dataset_path=os.path.join(workspace_path,'Dataset')
image_path = os.path.join(dataset_path,"TIFS")
json_path = os.path.join(dataset_path,"JSONS")
workspace_path

In [None]:
def rebuild_dir(target_path):
    if os.path.exists(target_path):
        shutil.rmtree(target_path)
        os.makedirs(target_path)
    else:
        os.makedirs(target_path)

In [None]:
save_path = os.path.join(dataset_path,'JSON_PNG')
rebuild_dir(save_path)

cache_path = os.path.join(dataset_path,'cache')
cache_label_path = os.path.join(cache_path,'label')
cache_img_path = os.path.join(cache_path,'img')
rebuild_dir(cache_path)
rebuild_dir(cache_label_path)
rebuild_dir(cache_img_path)

train_dataset_path = os.path.join(dataset_path,'train_dataset')
rebuild_dir(train_dataset_path)

x_train_dir = os.path.join(train_dataset_path, 'train')
y_train_dir = os.path.join(train_dataset_path, 'train_labels')
rebuild_dir(x_train_dir)
rebuild_dir(y_train_dir)

x_valid_dir = os.path.join(train_dataset_path, 'val')
y_valid_dir = os.path.join(train_dataset_path, 'val_labels')
rebuild_dir(x_valid_dir)
rebuild_dir(y_valid_dir)

x_test_dir = os.path.join(train_dataset_path, 'test')
y_test_dir = os.path.join(train_dataset_path, 'test_labels')
rebuild_dir(x_test_dir)
rebuild_dir(y_test_dir)
print('rebuilding sucess')

In [None]:
imgs = sorted([os.path.join(image_path,x) for x in os.listdir(image_path)])
jsons = sorted([os.path.join(json_path,x) for x in os.listdir(json_path)])
for img,js in zip(imgs,jsons):
    if img.split('/')[-1][:-4] == js.split('/')[-1][:-5]:
        print(img,'- OK')
    else:
        print(img,'- ERROR')

In [None]:
pd.set_option('display.max_colwidth', 5500)
df = pd.DataFrame({'TIF':imgs,'JSON':jsons})
df

In [None]:
Json_to_img(df.loc[0,'TIF'],df.loc[0,'JSON'],False)

In [None]:
for idx in df.index:
    Json_to_img(df.loc[idx,'TIF'],df.loc[idx,'JSON'],True,save_path)

In [None]:
json_png = sorted([os.path.join(save_path,x) for x in os.listdir(save_path)])
df['JSON_PNG'] = json_png
df

In [None]:
num = 0
    
for img_path,label_path in zip(df['TIF'],df['JSON_PNG']):
    print(os.path.splitext(img_path)[0])
    img_rgb = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img_label = cv2.imread(label_path, cv2.IMREAD_COLOR)
    
    for height in tqdm(range(0, img_rgb.shape[0], 512)):
        for width in range(0, img_rgb.shape[1], 512):
            # 512X512사이즈 CROP
            if (height+512>img_rgb.shape[0]) or (width+512>img_rgb.shape[1]): 
                pass
            else:
                img_rgb_crop = img_rgb[height:height+512, width:width+512, :]
                img_label_crop = img_label[height:height+512, width:width+512, :]
                if np.any(img_label_crop):
                    cv2.imwrite(cache_img_path+'/%05d.png' % num, img_rgb_crop)
                    cv2.imwrite(cache_label_path+'/%05d.png' % num, img_label_crop)
                num+=1

In [None]:
print(len(os.listdir(cache_label_path)))
print(len(os.listdir(cache_img_path)))

## Separating training, validation, and test datasets

In [None]:
def permutation_train_test_split(data,label , test_size=0.2, shuffle=True, random_state=1004):
    data_len=len(data)
    print(f'전체 데이터수 : {data_len}')
    test_num=int(data_len*test_size)
    valid_num=test_num
    train_num=data_len-test_num-valid_num
    
    if shuffle:
        np.random.seed(100)
        shuffled=np.random.permutation(data_len)
        data=data[shuffled]
        label=label[shuffled]
        x_train=data[:train_num]
        y_train=label[:train_num]
        x_valid=data[train_num:train_num+valid_num]
        y_valid=label[train_num:train_num+valid_num]
        x_test=data[-test_num:]
        y_test=label[-test_num:]
    else:
        x_train=data[:train_num]
        y_train=label[:train_num]
        x_valid=data[train_num:train_num+valid_num]
        y_valid=label[train_num:train_num+valid_num]
        x_test=data[-test_num:]
        y_test=label[-test_num:]

    return x_train, y_train, x_valid, y_valid, x_test, y_test

# Train, Validation, Test 파일 나누기 (6:2:2)
X_path=np.array(glob.glob(cache_img_path+"/*.png"))
Y_path=np.array(glob.glob(cache_label_path+"/*.png"))
x_train, y_train, x_valid, y_valid, x_test, y_test=permutation_train_test_split(X_path,Y_path,test_size=0.2,shuffle=True,random_state=1004)

print('훈련 데이터 수 = img : ',len(x_train),', label : ',len(y_train))
print('검증 데이터 수 = img : ',len(x_valid),', label : ',len(y_valid))
print('테스트 데이터 수 = img : ',len(x_test),', label : ',len(y_test))

In [None]:
for i in x_train:
    shutil.copy2(i,x_train_dir)
for i in y_train:
    shutil.copy2(i,y_train_dir)
for i in x_valid:
    shutil.copy2(i,x_valid_dir)
for i in y_valid:
    shutil.copy2(i,y_valid_dir)
for i in x_test:
    shutil.copy2(i,x_test_dir)
for i in y_test:
    shutil.copy2(i,y_test_dir)