### 데이터 전처리
- 폴리곤 어노테이션을 사용하여 이미지에서 특징 추출
- JSON 파일에서 breed, lesions, path추출

### 데이터프레임 생성 및 저장
- img_data (이미지 특징), breed, path, lesions 컬럼이 있는 df
- lesions은 분류 클래스
- csv 파일 저장

### 훈련/검증/테스트
- 모든 카테고리(A1-A6)에 대한 전처리 후, CSV 파일을 합치기
- 훈련 세트와 테스트 세트로 8:2 비율로 분할
- U-net사용하여 학습
- 훈련세트에서 9:1비율로 검증용으로 사용
- 학습 후 테스트 세트로 테스트

In [1]:
import os
import json
import numpy as np
from PIL import Image, ImageDraw
import pandas as pd
import glob

from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split




In [12]:
def load_json(json_path):
    with open(json_path, 'r', encoding='utf-8') as file:
        return json.load(file)

In [13]:
base_folder = 'data/training/dog/no/test'
class_folders = os.listdir(base_folder)

In [14]:
image_paths = []
json_paths = []

for class_folder in class_folders:
    image_files = glob.glob(os.path.join(base_folder, class_folder, '*.jpg'))
    json_files = [file.replace('.jpg', '.json') for file in image_files]
    
    image_paths.extend(image_files)
    json_paths.extend(json_files)
image_paths = [path.replace('\\', '/') for path in image_paths]
json_paths = [path.replace('\\', '/') for path in json_paths]

In [15]:
def create_mask(image_shape, polygons):
    mask = Image.new('L', image_shape, 0)
    for polygon in polygons:
        # 'xN', 'yN' 형태의 모든 점을 찾아서 목록 생성
        xy = []
        i = 1
        while f'x{i}' in polygon and f'y{i}' in polygon:
            x = polygon[f'x{i}']
            y = polygon[f'y{i}']
            xy.append((x, y))
            i += 1

        # 폴리곤을 마스크에 그리기
        ImageDraw.Draw(mask).polygon(xy, outline=1, fill=1)
    return mask

In [16]:
for img_path, json_path in zip(image_paths, json_paths):
    json_data = load_json(json_path)
    image = Image.open(img_path)
    image_shape = image.size

    # 폴리곤 좌표를 이용하여 마스크 생성
    polygons = json_data['labelingInfo'][0]['polygon']['location']    
    mask = create_mask(image_shape, polygons)

    mask_image_path = img_path.replace('.jpg', '_mask.png')
    mask.save(mask_image_path)

In [17]:
mask_paths = []

for class_folder in class_folders:
    mask_files = glob.glob(os.path.join(base_folder, class_folder, '*.png'))        
    mask_paths.extend(mask_files)
    
mask_paths = [path.replace('\\', '/') for path in mask_paths]

In [None]:
# 컨볼루션 블록 생성
def conv_block(input_tensor, num_filters):    
    x = Conv2D(num_filters, (3, 3), activation='relu', padding='same')(input_tensor)
    x = Conv2D(num_filters, (3, 3), activation='relu', padding='same')(x)
    return x

# 인코더 블록 생성
def encoder_block(input_tensor, num_filters):    
    x = conv_block(input_tensor, num_filters)
    p = MaxPooling2D((2, 2))(x)
    return x, p

# 디코더 블록 생성
def decoder_block(input_tensor, concat_tensor, num_filters):    
    x = UpSampling2D((2, 2))(input_tensor)
    x = concatenate([x, concat_tensor], axis=-1)
    x = conv_block(x, num_filters)
    return x

def unet(input_size=(256, 256, 1), num_filters_start=64, num_blocks=4, num_classes=7):
    inputs = Input(input_size)
    x = inputs
    skips = []
    
    for i in range(num_blocks):
        x, x_pool = encoder_block(x, num_filters_start * (2 ** i))
        skips.append(x)
        x = x_pool
        
    x = conv_block(x, num_filters_start * (2 ** num_blocks))
    
    for i in reversed(range(num_blocks)):
        x = decoder_block(x, skips[i], num_filters_start * (2 ** i))

    outputs = Conv2D(num_classes, (1, 1), activation='softmax')(x)

    model = Model(inputs=[inputs], outputs=[outputs])
    return model

In [None]:
model = unet()
model.summary()

In [None]:
for img_path, json_path in zip(image_paths, json_paths):
    json_data = load_json(json_path)
    image = Image.open(img_path)
    image_shape = image.size

    # breed와 label 추출
    breed = json_data['metaData']['breed']
    Path = json_data['metaData']['Path']
    labeling = json_data['labelingInfo'][0]['polygon']['label']

    # 폴리곤 좌표를 이용하여 마스크 생성
    polygons = json_data['labelingInfo'][0]['polygon']['location']    
    mask = create_mask(image_shape, polygons)

    # 마스크 이미지 저장 경로 설정
    mask_image_path = img_path.replace('.png', '_mask.png')
    mask.save(mask_image_path)

    # 데이터프레임에 정보 추가
    data = {
        'image_path': img_path,
        'mask_path': mask_image_path,
        'breed': breed,
        'Path': Path,
        'label': labeling
        }

# 데이터프레임 생성 및 CSV 파일로 저장
df = pd.DataFrame(data)
df.to_csv('dataset.csv', index=False)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 데이터 제너레이터 인스턴스 생성
data_gen_args = dict(rescale=1./255,
                     rotation_range=10,
                     width_shift_range=0.1,
                     height_shift_range=0.1,
                     shear_range=0.05,
                     zoom_range=0.05,
                     horizontal_flip=True,
                     fill_mode='nearest')

image_datagen = ImageDataGenerator(**data_gen_args)

# 모델 훈련
train_generator = image_datagen.flow_from_directory(
    'data/training',
    target_size=(256, 256),
    color_mode='grayscale',
    batch_size=32,
    class_mode='binary')

unet_model.fit_generator(
    train_generator,
    steps_per_epoch=2000,
    epochs=50)
