In [0]:
from google.colab import drive
import os
drive.mount('/content/drive')
os.chdir("/content/drive/My Drive/Colab Notebooks/image_data")
!ls

In [0]:
!pip install keras-lookahead keras-rectified-adam

# 3rd_Cupoy ML 100

In [0]:
import cv2
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from multiprocessing import Pool
from google.colab.patches import cv2_imshow

from keras.applications.xception import Xception
from keras.applications.mobilenet_v2 import MobileNetV2

from keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization, Dropout, Conv2D, MaxPooling2D, Input
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l1_l2
from keras.models import Model, load_model
from keras import backend as K

from keras_lookahead import Lookahead
from keras_radam import RAdam

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [0]:
l1_rate = 0.001
l2_rate = 0.001
img_width = 224
img_height = 224

In [0]:
def get_sample_size(method='train'):
    return sum([len(os.listdir(os.path.join(method, class_))) for class_ in os.listdir(method)])

### train

In [0]:
K.clear_session()

base_model = MobileNetV2(weights='imagenet', include_top=False, alpha=.75)

x = Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=l1_l2(l1=l1_rate, l2=l2_rate))(base_model.output)
x = MaxPooling2D((2, 2))(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)

x = Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=l1_l2(l1=l1_rate, l2=l2_rate))(base_model.output)
x = MaxPooling2D((2, 2))(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)

x = GlobalAveragePooling2D()(x)

x = Dense(256, activation='relu', kernel_regularizer=l1_l2(l1=l1_rate, l2=l2_rate))(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)

predictions = Dense(5, activation='softmax')(x)

model = Model(base_model.input, predictions)
for layers in base_model.layers:
    layers.trainable = False

model.compile(loss='categorical_crossentropy',
              optimizer=Lookahead(RAdam()),
              metrics=['accuracy'])

In [0]:
model = load_model('mobile_net_v2_checkpoint.h5', custom_objects={'RAdam': RAdam, 'Lookahead': Lookahead})

In [0]:
model.summary()

In [0]:
BATCH_SIZE = 256
EPOCHS = 100

early_stopping = EarlyStopping(monitor='val_loss', patience=5, min_delta=.00003)
mobile_net_v2_checkpoint_2 = ModelCheckpoint('mobile_net_v2_checkpoint.h5', monitor='val_loss', save_best_only=True)
data_generator = ImageDataGenerator(rotation_range=50, 
                                    width_shift_range=0.4, 
                                    height_shift_range=0.4, 
                                    shear_range=0.2,
                                    zoom_range=0.2,
                                    channel_shift_range=10,
                                    horizontal_flip=True, 
                                    vertical_flip=True, 
                                    fill_mode='reflect',
                                    rescale=1/255.)

train_batches = data_generator.flow_from_directory('train', target_size=(img_width, img_height), batch_size=BATCH_SIZE)
val_batches = data_generator.flow_from_directory('val', target_size=(img_width, img_height), batch_size=BATCH_SIZE)

history = model.fit_generator(train_batches, 
                              validation_data=val_batches, 
                              steps_per_epoch=get_sample_size('train') // BATCH_SIZE, 
                              epochs=EPOCHS, 
                              callbacks=[early_stopping, mobile_net_v2_checkpoint_2])

### evaluate

In [0]:
train_loss = history.history["loss"]
valid_loss = history.history["val_loss"]
train_acc = history.history["acc"]
valid_acc = history.history["val_acc"]

plt.plot(range(len(train_loss)), train_loss, label="train loss")
plt.plot(range(len(valid_loss)), valid_loss, label="valid loss")
plt.legend()
plt.title("Loss")
plt.show()

plt.plot(range(len(train_acc)), train_acc, label="train accuracy")
plt.plot(range(len(valid_acc)), valid_acc, label="valid accuracy")
plt.legend()
plt.title("Accuracy")
plt.show()

In [0]:
model.evaluate_generator(val_batches)

[3.209246989658901, 0.7285714285714285]

### predict

In [0]:
def img_load(path, width, height):
    return cv2.resize(cv2.imread(path), (width, height))
    
def get_test_img(width=299, height=299, seed=0):
    t = time.time()
    pool = Pool(processes=10)
    
    ids = []
    result = []
    list_dir = os.listdir('test')
    x = np.zeros(shape=[len(list_dir), width, height, 3], dtype='float16')

    for dir_ in list_dir:
        ids.append(dir_[:-4])
        result.append(pool.apply_async(img_load, args=(os.path.join('test', dir_), width, height)))

    pool.close()
    pool.join()

    for i, img in enumerate(result):
        x[i] = img.get()
    
    print('Success load %s\nTime:%ds' % ('test', time.time() - t))
    return ids, x

In [0]:
ids, test = get_test_img(width=img_width, height=img_height)
pred = np.argmax(model.predict(test / 255), axis=1)

submission = pd.DataFrame({'id': ids, 'flower_class': pred})
submission.to_csv('submission.csv', index = False)