In [1]:
import os
import numpy as np
import pandas as pd

path_list = []

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        path_list.append(os.path.join(dirname, filename))

In [2]:
path_list[4:9]

In [3]:
# import train.csv

train_df = pd.read_csv('../input/plant-pathology-2020-fgvc7/train.csv')

In [4]:
train_df.head()

In [5]:
train_df.shape

In [6]:
# import test.csv

test_df = pd.read_csv('../input/plant-pathology-2020-fgvc7/test.csv')

In [7]:
test_df.head()

In [8]:
test_df.shape

In [9]:
# train_df에 sum 컬럼 추가
# healthy, multiple_diseases, rust, scab 컬럼이 one-hot encoding 형식으로 되어있음
# sum이 1보다 큰지, 아니면 0인지 확인

train_df['sum'] = train_df['healthy'] + train_df['multiple_diseases'] + train_df['rust'] + train_df['scab']

# train_df의 sum 컬럼의 value count
train_df['sum'].value_counts()

In [10]:
# train_df의 target value 값이 중복된 값이 있는지 체크하는 작업
train_df[(train_df['sum'] > 1) | (train_df['sum'] == 0)]

In [11]:
# image의 절대 경로를 DataFrame에 추가
pd.set_option('max_colwidth', 100)

IMAGE_DIR = '/kaggle/input/plant-pathology-2020-fgvc7/images'
train_df['path'] = IMAGE_DIR + '/' + train_df['image_id'] + '.jpg'

train_df.head()

In [12]:
# 개별 컬럼 별 0/1 값을 구분하여 클래스 라벨로 생성
# 이미지의 label을 DataFrame에 추가

def get_label(x):
    if x['healthy'] == 1:
        return 'healthy'
    elif x['multiple_diseases'] == 1:
        return 'multiple_diseases'
    elif x['scab'] == 1:
        return 'scab'
    elif x['rust'] == 1:
        return 'rust'
    else:
        return 'None'

train_df['label'] = train_df.apply(
    lambda x: get_label(x), axis=1
)

train_df.head()

In [13]:
# 학습 이미지 건수 및 label 별 건수 확인
print('train df shape: ', train_df.shape)
print()
print('label 별 건수')
train_df['label'].value_counts()

In [14]:
# 원본 이미지 시각화 function

# 녹병균(rust), 박테리아성 질환(scab), 복합 질병(multiple_diseases), 건강(healthy)
# 이미지 size는 (1365, 2048)

import seaborn as sns
import matplotlib.pyplot as plt
import cv2
%matplotlib inline


def show_grid_images(image_path_list, augmentor=None, ncols=4, title=None):
    '''
    :parameters:
    
    image_path_list: image path가 담겨있는 list
    augmentor: Augmentation 유무
    ncols: label 별 시각화 할 image 개수
    title: title 유무
    '''
    figure, axs = plt.subplots(
        figsize=(22, 4),
        nrows=1,
        ncols=ncols
    )
    
    for i in range(ncols):
        image = cv2.cvtColor(cv2.imread(image_path_list[i]), cv2.COLOR_BGR2RGB)
        if augmentor is not None:
            image = augmentor(image=image)['image']
        axs[i].imshow(image)
        axs[i].set_title(title)
        print(f"{title}: {image.shape}")

In [15]:
# label 별 image path list 
rust_image_list = train_df[train_df['label'] == 'rust']['path'].iloc[:6].tolist()
scab_image_list = train_df[train_df['label'] == 'scab']['path'].iloc[:6].tolist()
healthy_image_list = train_df[train_df['label'] == 'healthy']['path'].iloc[:6].tolist()
multiple_image_list = train_df[train_df['label'] == 'multiple_diseases']['path'].iloc[:6].tolist()

In [16]:
# label 별 image visualize
show_grid_images(rust_image_list, ncols=6, title='rust')

In [17]:
show_grid_images(scab_image_list, ncols=6, title='scab')

In [18]:
show_grid_images(healthy_image_list, ncols=6, title='healthy')

In [19]:
show_grid_images(multiple_image_list, ncols=6, title='multiple_diseases')

In [20]:
# Image Augmentation

import albumentations as A


augmentor_01 = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(scale_limit=(0.7, 0.9), p=0.5, rotate_limit=30),
    A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), p=0.5),
    A.Blur(p=0.2)
])

In [21]:
# Apply Augmentation(augmentor_01) to image visualize

# rust
show_grid_images(rust_image_list, augmentor=None, ncols=6, title='original rust')
show_grid_images(rust_image_list, augmentor=augmentor_01, ncols=6, title='augmented rust')

In [22]:
# Apply Augmentation(augmentor_01) to image visualize

# scab
show_grid_images(scab_image_list, augmentor=None, ncols=6, title='original scab')
show_grid_images(scab_image_list, augmentor=augmentor_01, ncols=6, title='augmented scab')

In [49]:
# Sequence 기반의 Dataset 생성

from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import Sequence
import sklearn
import cv2


class Plant_Dataset(Sequence):
    '''
        :parameters
        image_filenames: opencv로 image를 로드할 파일의 절대 경로
        labels: 해당 image의 label
        batch_size: __getitem__(self, index) 호출 시 마다 가져올 데이터 batch 건수
        augmentor: albumentations 객체
        shuffle: 학습 데이터의 경우 epoch 종료 시 마다 데이터를 섞을 지 여부
    '''
    def __init__(self, image_filenames, labels, image_size=(224, 224),
                batch_size=64, augmentor=None, shuffle=False, pre_func=None):
        
        # 객체 생성 인자로 들어온 값을 객체 내부 변수로 할당한다.
        self.image_filenames = image_filenames
        self.labels = labels
        self.image_size = image_size
        self.batch_size = batch_size
        self.augmentor = augmentor
        self.pre_func = pre_func
        self.shuffle = shuffle
        
        # train_data의 경우, 객체 생성 시 한번 데이터를 shuffle
        if self.shuffle:
            # self.on_epoch_end()
            pass
    
    # Sequence를 상속받은 Dataset은 batch_size 단위로 입력된 데이터를 처리함
    
    # __len__()은 전체 데이터 건수가 주어졌을 때, batch_size 단위로 몇 번 데이터를 반환하는지
    def __len__(self):
        # batch_size 단위로 데이터를 몇 번 가져와야하는지 계산하기 위해 전체 데이터 건수를 batch_size로 나눈다.
        # 정수로 정확히 나누어지지 않을 경우, 1회를 더한다.
        return int(np.ceil(len(self.image_filenames) / self.batch_size))
    
    # batch_size 단위로 image_array, label_array 데이터를 가져와서 변환한 뒤 다시 반환함.
    # 인자로 몇 번째 batch 인지 나타내는 index를 입력하면 해당 순서의 batch_size 만큼의 데이터를 가공하여 반환
    def __getitem__(self, index):
        '''
        :parameters
        index: 몇 번째 batch 인지 나타냄
        
        :return: batch_size 개수 만큼의 image_array와 label_array
        '''
        
        # batch_size 만큼 순차적으로 데이터를 가져오기 위해서
        # 1 index: array[index*self.batch_size:(index+1)*self.batch_size]
        image_name_batch = self.image_filenames[index*self.batch_size:(index+1)*self.batch_size]
        
        if self.labels is not None:
            label_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        
        # label_batch가 None이 될 수 있음
        else:
            label_batch = None
        
        # 만일 객체 생성 인자로 albumentation으로 만든 augmentor가 주어진다면
        # 아래와 같이 augmentor를 이용하여 image 변환
        # image_batch 배열은 float32로 설정
        image_batch = np.zeros(
            (image_name_batch.shape[0], self.image_size[0], self.image_size[1], 3),
            dtype='float32'
        )
        
        # batch_size에 담긴 건수만큼 iteration 하면서 opencv image load
        # -> image augmentation 반환 (augmentor가 not None일 경우)
        # -> image_batch에 담음
        for image_index in range(image_name_batch.shape[0]):
            image = cv2.cvtColor(cv2.imread(image_name_batch[image_index]), cv2.COLOR_BGR2RGB)
            if self.augmentor is not None:
                image = self.augmentor(image=image)['image']
            
            # 원본 이미지와 다르게 resize 적용
            # array: (너비, 높이)
            # opencv: (높이, 너비)
            image = cv2.resize(image, (self.image_size[1], self.image_size[0]))
            
            # 만일 preprocessing_input이 pre_func 인자로 들어오면, 이를 이용하여 scailing 적용
            if self.pre_func is not None:
                image = self.pre_func(image)
            
            image_batch[image_index] = image
        
        return image_batch, label_batch
    
    # epoch가 한 번 수행이 완료될 때, 모델의 fit()에서 호출
    def on_epoch_end(self):
        if (self.shuffle):
            # 전체 image 파일의 위치와 label의 쌍을 맞춰서 섞는다.
            # sklearn의 utils.shuffle에서 해당 기능 제공
            self.image_filenames, self.labels = sklearn.utils.shuffle(self.image_filenames, self.labels)
        else:
            pass

In [50]:
# 학습 데이터용 DataFrame에서 train/validation image path와 Label을 추출하고 이를 Dataset으로 생성

# 이미 학습용 DataFrame에 'healthy', 'multiple_diseases', 'rust', 'scab' 순으로 one-hot encoding 되어있음
# kaggle에서 test data의 예측 결과를 'healthy', 'multiple_diseases', 'rust', 'scab' 순서로 제출을 요구함
# 따라서, 이를 다시 별도로 one-hot encoding 해서는 안됨.
# Augmentation은 앞에서 생성한 augmentor_01 적용
# pre_func는 xception용 Preprocessing 함수를 적용

In [51]:
sample_df = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/sample_submission.csv')
sample_df.head()

In [52]:
train_df.columns.tolist()

In [53]:
train_df['path'].values

In [54]:
# train/validation set split
from sklearn.model_selection import train_test_split


def get_train_valid(train_df, valid_size=0.2, random_state=2021):
    
    # array type
    train_path = train_df['path'].values
    
    # 별도의 one-hot encoding을 하지 않고, 
    # 'healthy', 'multiple_diseases', 'rust', 'scab' 컬럼들을 모두 
    # Numpy array로 변환하는 수준으로 label에 one-hot encoding 적용
    train_label = train_df[['healthy', 'multiple_diseases', 'rust', 'scab']].values
    
    tr_path, val_path, tr_label, val_label = train_test_split(
                                                train_path,
                                                train_label,
                                                test_size=valid_size,
                                                random_state=random_state
                                            )
    
    print("tr_path shape: ", tr_path.shape)
    print("tr_label shape: ", tr_label.shape)
    print("val_path shape: ", val_path.shape)
    print("val_label shape: ", val_label.shape)
    
    return tr_path, val_path, tr_label, val_label

In [55]:
from tensorflow.keras.applications.xception import preprocess_input as xcp_preprocess_input
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preprocess_input


# image size는 224 x 224로 Dataset 생성
IMAGE_SIZE = (320, 512)
BATCH_SIZE = 64

tr_path, val_path, tr_label, val_label = get_train_valid(train_df, valid_size=0.2, random_state=2021)

In [56]:
print(Plant_Dataset.__doc__)

In [97]:
tr_ds = Plant_Dataset(
    tr_path,
    tr_label,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    augmentor=augmentor_01,
    shuffle=True,
    pre_func=xcp_preprocess_input
)

val_ds = Plant_Dataset(
    val_path,
    val_label,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    augmentor=None,
    shuffle=False,
    pre_func=xcp_preprocess_input
)

In [98]:
print(dir(tr_ds))

In [99]:
for k, v in tr_ds.__dict__.items():
    print(f"{k}")
    print(f"{v}")
    print()

In [100]:
print(dir(val_ds))

In [101]:
for k, v in val_ds.__dict__.items():
    print(f"{k}")
    print(f"{v}")
    print()

In [102]:
if hasattr(tr_ds, '__iter__'):
    tr_image_batch, tr_label_batch = next(iter(tr_ds))

if hasattr(val_ds, '__iter__'):
    val_image_batch, val_label_batch = next(iter(val_ds))

In [103]:
print("train_image_batch shape: ", tr_image_batch.shape)
print("val_image_batch shape: ", val_image_batch.shape)
print("tr_label_batch shape: ", tr_label_batch.shape)
print("val_label_batch shape: ", val_label_batch.shape)

In [113]:
# Model Create

# resnet50v2
# xception
# efficientnetb0-b7

from tensorflow.keras.models import Sequential , Model
from tensorflow.keras.layers import Input, Dense , Conv2D , Dropout , Flatten , Activation, MaxPooling2D , GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam , RMSprop 
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau , EarlyStopping , ModelCheckpoint , LearningRateScheduler

from tensorflow.keras.applications import Xception, ResNet50V2, EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3
from tensorflow.keras.applications import EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7
import tensorflow as tf

def create_model(model_type='efficientnetb0', in_shape=(224, 224, 3), n_classes=4):
    input_tensor = Input(shape=in_shape)
    
    if model_type == 'resnet50v2':
        base_model = tf.keras.applications.ResNet50V2(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'xception':
        base_model = tf.keras.applications.Xception(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb0':
        base_model = tf.keras.applications.EfficientNetB0(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb1':
        base_model = tf.keras.applications.EfficientNetB1(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb2':
        base_model = tf.keras.applications.EfficientNetB2(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb3':
        base_model = tf.keras.applications.EfficientNetB3(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb4':
        base_model = tf.keras.applications.EfficientNetB4(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb5':
        base_model = tf.keras.applications.EfficientNetB5(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb6':
        base_model = tf.keras.applications.EfficientNetB6(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb7':
        base_model = tf.keras.applications.EfficientNetB7(include_top=False, weights='imagenet', input_tensor=input_tensor)
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    preds = Dense(units=n_classes, activation='softmax')(x)
    model = Model(inputs=input_tensor, outputs=preds)
    
    return model

In [104]:
# Learning Rate Scheduler에 적용할 함수 선언

def lrfn_01(epoch):
    lr_start = 1e-5
    lr_max = 1e-4
    lr_rampup_epochs = 2
    lr_sustain_epochs = 1
    lr_step_decay = 0.75
    
    def calc_fn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = lr_max * lr_step_decay**((epoch - lr_rampup_epochs - lr_sustain_epochs)//2)
        return lr
    
    return calc_fn(epoch)

def lrfn_02(epoch):
    lr_start = 1e-6
    lr_max = 2e-5
    lr_rampup_epochs = 2
    lr_sustain_epochs = 1
    lr_step_decay = 0.75
    
    def calc_fn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = lr_max * lr_step_decay**((epoch - lr_rampup_epochs - lr_sustain_epochs)//2)
        return lr
    
    return calc_fn(epoch)
    

In [106]:
# Callbacks - LearningRateScheduler
import tensorflow as tf


lr01_cb = tf.keras.callbacks.LearningRateScheduler(lrfn_01, verbose=1)
lr02_cb = tf.keras.callbacks.LearningRateScheduler(lrfn_02, verbose=2)

In [107]:
# Callbacks - ReduceLROnPlateau

rlr_cb = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    mode='min',
    verbose=1
)

In [108]:
# Callbacks - EarlyStopping

ely_cb = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    mode='min',
    verbose=1
)

## Xception

In [112]:
# Config를 이용하여 학습 수행

# - Model: Xception
# - Image Size: (320, 512)
# - Batch Size: 32
# - Initial Value of LR: 0.0001
# - LR Scheduler: Ramp up and Step decay
# - epochs = 10
# - not fine tuning
# - augmentor -> augmentor_01

from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preprocess_input
from tensorflow.keras.applications.xception import preprocess_input as xcp_preprocess_input


class Config:
    model_type = 'xception'
    image_size = (320, 512)
    batch_size = 32
    n_epochs = 10
    is_fine_tuning = False
    first_epochs = 15
    second_epochs = 15
    first_callbacks = [lr01_cb, ely_cb]
    second_callbacks = [lr02_cb, ely_cb]
    augmentor = augmentor_01
    pre_func = xcp_preprocess_input
    initial_lr = 0.0001
    debug = True

## EfficientNetB3

In [134]:
# Config를 이용하여 학습 수행

# - Model: EfficientNetB3
# - Image Size: (320, 512)
# - Batch Size: 16
# - Initial Value of LR: 0.0001
# - LR Scheduler: Ramp up and Step decay
# - epochs = 10
# - not fine tuning
# - augmentor -> augmentor_01

class Config:
    model_type = 'efficientnetb3'
    image_size = (320, 512)
    batch_size = 16 # OOM Issue
    n_epochs = 10
    is_fine_tuning = False
    first_epochs = 15
    second_epochs = 15
    first_callbacks = [lr01_cb, ely_cb]
    second_callbacks = [lr02_cb, ely_cb]
    augmentor = augmentor_01
    pre_func = eff_preprocess_input
    initial_lr = 0.0001
    debug = True

## EfficientNet B5

In [142]:
# Config를 이용하여 학습 수행

# - Model: EfficientNetB5
# - Image Size: (456, 456)
# - Batch Size: 8
# - Initial Value of LR: 0.0001
# - LR Scheduler: Ramp up and Step decay
# - epochs = 10
# - not fine tuning
# - augmentor -> augmentor_01

class Config:
    model_type = 'efficientnetb3'
    image_size = (456, 456)
    batch_size = 8 # OOM Issue
    n_epochs = 10
    is_fine_tuning = False
    first_epochs = 15
    second_epochs = 15
    first_callbacks = [lr01_cb, ely_cb]
    second_callbacks = [lr02_cb, ely_cb]
    augmentor = augmentor_01
    pre_func = eff_preprocess_input
    initial_lr = 0.0001
    debug = True

## EfficientNet B7

In [149]:
# Config를 이용하여 학습 수행

# - Model: EfficientNetB7
# - Image Size: (456, 456)
# - Batch Size: 4
# - Initial Value of LR: 0.0001
# - LR Scheduler: Ramp up and Step decay
# - epochs = 15
# - not fine tuning
# - augmentor -> augmentor_01

class Config:
    model_type = 'efficientnetb3'
    image_size = (456, 456)
    batch_size = 4 # OOM Issue
    n_epochs = 15
    is_fine_tuning = False
    first_epochs = 15
    second_epochs = 15
    first_callbacks = [lr01_cb, ely_cb]
    second_callbacks = [lr02_cb, ely_cb]
    augmentor = augmentor_01
    pre_func = eff_preprocess_input
    initial_lr = 0.0001
    debug = True

In [150]:
# Model Train
from tensorflow.keras.metrics import AUC


def train_model(train_df, config=Config):
    
    # Train & Validation Split
    tr_path, val_path, tr_label, val_label = \
    get_train_valid(train_df, valid_size=0.2, random_state=2021)
    
    # Create Dataset based on Sequence
    tr_ds = Plant_Dataset(
        tr_path,
        tr_label,
        image_size=config.image_size,
        batch_size=config.batch_size,
        augmentor=config.augmentor,
        shuffle=True,
        pre_func=config.pre_func
    )
    
    val_ds = Plant_Dataset(
        val_path,
        val_label,
        image_size=config.image_size,
        batch_size=config.batch_size,
        augmentor=None,
        shuffle=False,
        pre_func=config.pre_func
    )
    
    print()
    # Debugging for prepared dataset based on sequence
    if config.debug:
        print('####################Debugging for prepared dataset based on sequence####################')
        if hasattr(tr_ds, '__iter__'):
            tr_image_batch = next(iter(tr_ds))[0]
            print("train image batch shape: ", tr_image_batch.shape)
        if hasattr(val_ds, '__iter__'):
            val_image_batch = next(iter(val_ds))[0]
            print("validation image batch shape: ", val_image_batch.shape)
    
    print()
    # Create Model
    # - Optimizer: Adam
    print(f'####################{config.model_type}####################')
    model = create_model(
        model_type=config.model_type,
        in_shape=(config.image_size[0], config.image_size[1], 3),
        n_classes=4
    )
    print('####################Optimizer: Adam####################')
    model.compile(
        optimizer=Adam(lr=config.initial_lr),
        loss='categorical_crossentropy',
        metrics=[AUC()]
    )
    
    print()
    # Fine Tuning
    if config.is_fine_tuning:
        print('####################Fine Tuning Train Start!####################')
        
        # First Fine Tuning
        for layer in model.layers[:-4]:
            layer.trainable = False # Feature Extractor Layer=False
            
        print('####################First Fine Tuning - Training Classification Layer####################')
        history = model.fit(
            tr_ds,
            epochs=config.first_epochs,
            steps_per_epoch=int(np.ceil(tr_path.shape[0]/config.batch_size)),
            validation_data=val_ds,
            validation_steps=int(np.ceil(val_path.shape[0]/config.batch_size)),
            callbacks=config.first_callbacks,
            verbose=1
        )
        
        # Second Fine Tuning
        for layer in model.layers:
            if config.model_type in 'efficientnet':
                if not isinstance(layer, layers.BatchNormalization):
                    layer.trainable=True
            else:
                layer.trainable=True
        
        print('####################Second Fine Tuning - Training All Layers####################')
        history = model.fit(
            tr_ds,
            epochs=config.second_epochs,
            steps_per_epoch=int(np.ceil(tr_path.shape[0]/config.batch_size)),
            validation_data=val_ds,
            validation_steps=int(np.ceil(val_path.shape[0]/config.batch_size)),
            callbacks=config.second_callbacks,
            verbose=1
        )
    
    # Not Fine Tuning
    else:
        print('####################Training Start - Not Fine Tuning####################')
        history = model.fit(
            tr_ds,
            epochs=config.n_epochs,
            steps_per_epoch=int(np.ceil(tr_path.shape[0]/config.batch_size)),
            validation_data=val_ds,
            validation_steps=int(np.ceil(val_path.shape[0]/config.batch_size)),
            callbacks=config.first_callbacks,
            verbose=1
        )
    
    return model, history        

In [151]:
# xcp_model_02, history = train_model(train_df, config=Config)
effb7_model, history = train_model(train_df, config=Config)

In [152]:
# Create Test Dataset

IMAGE_DIR = '/kaggle/input/plant-pathology-2020-fgvc7/images'

test_df['path'] = IMAGE_DIR + "/" + test_df['image_id'] + '.jpg'

test_df.head()

In [153]:
test_df.shape

In [156]:
# Make submit file

def make_submit_df(test_df, model, config=Config):
    test_path = test_df['path'].values
    
    # Test Dataset: Labels are None
    test_ds = Plant_Dataset(
        image_filenames=test_path,
        labels=None,
        image_size=config.image_size,
        batch_size=config.batch_size,
        augmentor=None,
        shuffle=False,
        pre_func=config.pre_func
    )
    
    # Predict
    preds = model.predict(test_ds)
    
    # Make submit dataframe format
    preds_df = pd.DataFrame(preds)
    preds_df.columns = ['healthy', 'multiple_diseases', 'rust', 'scab']
    
    submit_df = pd.concat([test_df['image_id'], preds_df], axis=1)
    
    return submit_df

In [157]:
submit_df = make_submit_df(test_df, effb7_model, config=Config)

submit_df.to_csv('submit_eff_b7.csv', index=False)