### Transfer Learning Task
- 쌀 이파리 병에 대한 분류

In [79]:
from glob import glob
import os

root = './datasets/rice_leaf_diseases_dataset/original/'

directories = glob(os.path.join(root,'*'))
dir_names = []

# root 경로 안 directory들의 이름 출력
for directory in directories:
    dir_names.append(directory[directory.rindex('\\') + 1:])

print(dir_names)

['Bacterialblight', 'Brownspot', 'Leafsmut']


In [44]:
# 각 폴더 안 이미지 파일의 이름을 '폴더명+숫자(01, 02, ...).png'로 변경
for name in dir_names:
    for i, file_name in enumerate(os.listdir(os.path.join(root, name))):
        # 기존 파일명
        old_file = os.path.join(root + name + '/', file_name)

        # 변경 후 파일명
        new_file = os.path.join(root + name + '/', name + str(i + 1) + '.png')

        os.rename(old_file, new_file)

FileExistsError: [WinError 183] 파일이 이미 있으므로 만들 수 없습니다: './datasets/rice_leaf_diseases_dataset/original/Bacterialblight/Bacterialblight10.png' -> './datasets/rice_leaf_diseases_dataset/original/Bacterialblight/Bacterialblight2.png'

In [99]:
IMAGE_SIZE = 64
BATCH_SIZE = 20

In [81]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# ImageDataGenerator 선언 - RGB 값을 0 ~ 1사이로 스케일링만
image_data_generator = ImageDataGenerator(rescale=1./255)

generator = image_data_generator.flow_from_directory(root,
                                                     target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                                     batch_size=BATCH_SIZE,
                                                     class_mode='categorical')

print(generator.class_indices)

Found 4684 images belonging to 3 classes.
{'Bacterialblight': 0, 'Brownspot': 1, 'Leafsmut': 2}


In [82]:
import pandas as pd

# 경로 및 target 클래스로 데이터프레임 생성
a_df = pd.DataFrame({'file_paths': generator.filepaths, 'targets': generator.classes})
a_df

Unnamed: 0,file_paths,targets
0,./datasets/rice_leaf_diseases_dataset/original...,0
1,./datasets/rice_leaf_diseases_dataset/original...,0
2,./datasets/rice_leaf_diseases_dataset/original...,0
3,./datasets/rice_leaf_diseases_dataset/original...,0
4,./datasets/rice_leaf_diseases_dataset/original...,0
...,...,...
4679,./datasets/rice_leaf_diseases_dataset/original...,2
4680,./datasets/rice_leaf_diseases_dataset/original...,2
4681,./datasets/rice_leaf_diseases_dataset/original...,2
4682,./datasets/rice_leaf_diseases_dataset/original...,2


In [83]:
# file_path의 '\\'를 '/'로 변경
a_df.loc[:, 'file_paths'] = a_df.file_paths.apply(lambda x: x.replace('\\', '/'))
a_df

Unnamed: 0,file_paths,targets
0,./datasets/rice_leaf_diseases_dataset/original...,0
1,./datasets/rice_leaf_diseases_dataset/original...,0
2,./datasets/rice_leaf_diseases_dataset/original...,0
3,./datasets/rice_leaf_diseases_dataset/original...,0
4,./datasets/rice_leaf_diseases_dataset/original...,0
...,...,...
4679,./datasets/rice_leaf_diseases_dataset/original...,2
4680,./datasets/rice_leaf_diseases_dataset/original...,2
4681,./datasets/rice_leaf_diseases_dataset/original...,2
4682,./datasets/rice_leaf_diseases_dataset/original...,2


In [84]:
from sklearn.model_selection import train_test_split

# train, test 데이터 세트 분할
X_train, X_test, y_train, y_test = \
            train_test_split(a_df.file_paths, a_df.targets, stratify=a_df.targets, test_size=0.2, random_state=124)

# train, test의 target 분포 출력
print(y_train.value_counts())
print(y_test.value_counts())

targets
1    1296
0    1283
2    1168
Name: count, dtype: int64
targets
1    324
0    321
2    292
Name: count, dtype: int64


In [85]:
from sklearn.model_selection import train_test_split

# validation 데이터 세트 분할
X_train, X_val, y_train, y_val = \
train_test_split(X_train, y_train, stratify=y_train, test_size=0.2, random_state=124)

# train, validation의 target 분포 출력
print(y_train.value_counts())
print(y_val.value_counts())

targets
1    1037
0    1026
2     934
Name: count, dtype: int64
targets
1    259
0    257
2    234
Name: count, dtype: int64


In [86]:
import shutil

root = './datasets/rice_leaf_diseases_dataset/'

# train 이미지를 'train' 폴더에 복사
for file_path in X_train:
    almond_dir = file_path[len(root + 'original/'): file_path.rindex('/')]
    destination = os.path.join(root, 'train/' + almond_dir)

    # train 폴더가 없으면 생성
    if not os.path.exists(destination):
        os.makedirs(destination)

    shutil.copy2(file_path, destination)

In [87]:
# validation과 test도 똑같이 생성
for file_path in X_val:
    almond_dir = file_path[len(root + 'original/'): file_path.rindex('/')]
    destination = os.path.join(root, 'validation/' + almond_dir)

    # train 폴더가 없으면 생성
    if not os.path.exists(destination):
        os.makedirs(destination)

    shutil.copy2(file_path, destination)

In [88]:
for file_path in X_test:
    almond_dir = file_path[len(root + 'original/'): file_path.rindex('/')]
    destination = os.path.join(root, 'test/' + almond_dir)

    # train 폴더가 없으면 생성
    if not os.path.exists(destination):
        os.makedirs(destination)

    shutil.copy2(file_path, destination)

In [89]:
import albumentations as A
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2

# 이미지 Augmentation 함수
# Compose의 확률을 1로 지정하지 않는 이유는 원본 데이터도 fit하기 위함
def aug_transform(image):
    aug = A.Compose([
        A.ShiftScaleRotate(shift_limit=0, scale_limit=(0.5, 1.5), rotate_limit=90, border_mode=cv2.BORDER_CONSTANT, p=0.5),
        A.RandomResizedCrop(width=224, height=224, scale=(0.1, 0.5), p=1),
        A.OneOf([
            A.ColorJitter(p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5)
        ], p=1)
    ], p=0.8)

    return aug(image=image)['image']

# 위 함수를 ImageDataGenerator를 불러올 때 같이 실행
idg = ImageDataGenerator(preprocessing_function=aug_transform, rescale=1./255)

In [90]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_dir = './datasets/rice_leaf_diseases_dataset/train/'
val_dir = './datasets/rice_leaf_diseases_dataset/validation/'
test_dir = './datasets/rice_leaf_diseases_dataset/test/'

# train 데이터에는 Augmentation 적용
train_generator = idg.flow_from_directory(
    train_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

# validation과 test 데이터는 RGB 값 스케일링만 실행
non_aug_idg = ImageDataGenerator(rescale=1./255)

val_generator = non_aug_idg.flow_from_directory(
    val_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_generator = non_aug_idg.flow_from_directory(
    test_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# 각각의 클래스 분류 출력
print(train_generator.class_indices)
print(val_generator.class_indices)
print(test_generator.class_indices)

Found 2997 images belonging to 3 classes.
Found 750 images belonging to 3 classes.
Found 937 images belonging to 3 classes.
{'Bacterialblight': 0, 'Brownspot': 1, 'Leafsmut': 2}
{'Bacterialblight': 0, 'Brownspot': 1, 'Leafsmut': 2}
{'Bacterialblight': 0, 'Brownspot': 1, 'Leafsmut': 2}


### 모델 생성

In [94]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, Dropout, Flatten, Activation, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.applications import VGG16

def create_model(verbose=False):
    input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    
    model = VGG16(input_tensor=input_tensor, include_top=False, weights='imagenet')

    # Classifier
    x = model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(50, activation='relu')(x)
    output = Dense(3, activation='softmax')(x)
    
    model = Model(inputs=model.input, outputs=output)
    
    if verbose:
        model.summary()
        
    return model

In [95]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy

mcp_cb = ModelCheckpoint(
    filepath="./callback_files/weights.{epoch:03d}-{val_loss:.4f}-{acc:.4f}.weights.h5",
    monitor='val_loss',
    save_best_only=False,
    save_weights_only=True,
    mode='min'
)

rlr_cb = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=2,
    mode='min'
)

ely_cb = EarlyStopping(
    monitor='val_loss',
    patience=4,
    mode='min'
)

# 모델 생성 후 컴파일링
model = create_model(verbose=True)
model.compile(optimizer=Adam(), loss=CategoricalCrossentropy(), metrics=['acc'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 1us/step


In [97]:
import gc

# Garbage Collector: 불필요한 object를 메모리에서 해제시킨다
# 쾌적하게 fit 가능 
gc.collect()

1232

In [98]:
# validation_split: train 데이터 중 일정 비율을 validation 데이터로 자동 분리해준다
# validation 데이터 따로 분리할 필요 없음
history = model.fit(train_generator, 
                    batch_size=BATCH_SIZE, 
                    epochs=10, 
                    validation_data=(val_generator), 
                    callbacks=[mcp_cb, rlr_cb, ely_cb])

Epoch 1/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8s/step - acc: 0.3533 - loss: 1.1683

  self._warn_if_super_not_called()


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m442s[0m 9s/step - acc: 0.3537 - loss: 1.1668 - val_acc: 0.3520 - val_loss: 177.0448 - learning_rate: 0.0010
Epoch 2/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m445s[0m 9s/step - acc: 0.4414 - loss: 1.0212 - val_acc: 0.4387 - val_loss: 7.4669 - learning_rate: 0.0010
Epoch 3/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m471s[0m 10s/step - acc: 0.4569 - loss: 0.9965 - val_acc: 0.6373 - val_loss: 1.9145 - learning_rate: 0.0010
Epoch 4/10
[1m 6/47[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m6:26[0m 9s/step - acc: 0.5000 - loss: 1.0169

KeyboardInterrupt: 

In [None]:
model.evaluate(test_flow)

In [None]:
import matplotlib.pyplot as plt

def show_history(history):
    plt.figure(figsize=(6, 6))
    plt.yticks(np.arange(0, 1, 0.05))
    plt.plot(history.history['acc'], label='train')
    plt.plot(history.history['val_acc'], label='validation')
    plt.legend()
    
show_history(history)