In [1]:
##### DNN module

from keras.utils import to_categorical
from keras import backend as K
from keras.models import Model
from keras.applications import ResNet50
from keras.optimizers import Adagrad, RMSprop, Adam
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [2]:
##### Common moldule

import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import time
from PIL import Image
from scipy.interpolate import RegularGridInterpolator
import pandas as pd

In [3]:
##### Path

train_db = 'database/combined_train_mx_240.npy'
label_db = 'database/combined_label_mx_240.npy'

flower_li = list(map(os.path.basename, glob.glob('database/image_data/train/*')))
flower_mapping = {flower_li[i]: i for i in range(5)}

print(flower_mapping)

{'daisy': 0, 'dandelion': 1, 'rose': 2, 'sunflower': 3, 'tulip': 4}


In [4]:
##### Load data

flower_data   = np.load(train_db)#.astype('uint8')
flower_target = np.load(label_db)#.astype('uint8')

x_train, x_test, y_train, y_test = train_test_split(flower_data, flower_target, test_size=0.2, random_state=9527)

In [5]:
##### Dimenssion check

(np.shape(x_train), np.shape(x_test), np.shape(y_train), np.shape(y_test))

((2258, 240, 240, 3), (565, 240, 240, 3), (2258,), (565,))

In [6]:
##### Normalize / Categorize

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train / 255.
x_test = x_test / 255.
y_train = to_categorical(y_train, 5)
y_test = to_categorical(y_test, 5)

In [7]:
##### Parameter

image_size = (240, 240)
num_class = 5
batch_size = 20
num_epoches = 5
#freeze_layer = 2

In [8]:
net = ResNet50(include_top=False,
               weights='imagenet',
               input_tensor=None,
               input_shape=(image_size[0], image_size[1], 3))



In [9]:
x = net.output
x = Flatten()(x)
x = Dropout(0.5)(x)
output_layer = Dense(num_class, activation='softmax', name='softmax')(x)

In [10]:
net_final = Model(inputs=net.input, outputs=output_layer)

In [11]:
net_final.compile(optimizer=Adam(lr=1e-5),
                  loss='categorical_crossentropy', metrics=['accuracy'])

In [12]:
print(net_final.summary())

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 240, 240, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 246, 246, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 120, 120, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 120, 120, 64) 256         conv1[0][0]                      
____________________________________________________________________________________________

In [13]:
augment_generator = ImageDataGenerator(rotation_range=40,
                                       width_shift_range=0.2,
                                       height_shift_range=0.2,
                                       rescale=1./255,
                                       shear_range=0.2,
                                       zoom_range=0.2,
                                       horizontal_flip=True,
                                       fill_mode='nearest'
                                      )

In [14]:
history = net_final.fit_generator(augment_generator.flow(x_train, y_train, batch_size=batch_size),
                    steps_per_epoch=int(len(x_train)/batch_size), # 一個 epochs 要執行幾次 update，通常是資料量除以 batch size
                    epochs=num_epoches,
                    verbose=1,
                    validation_data=(x_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [15]:
score = net_final.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.6714184874981906
Test accuracy: 0.7840707898139954


In [16]:
net_final.save('model/STD_datagen_1230_05.h5')

In [17]:
##### Test on unknown sample

In [18]:
def resampleRGI3d(input_mx, resize_to, dtype='float32'):
    # Ref: 10 3-A-7
    # Ver. 2.1
    # input_mx : numpy array, the original target matrix
    # resize_to: list or tuple with 3 int inside
    a, b, c = np.shape(input_mx)
    p, q, r = resize_to
    z_grid = np.linspace(0, p - 1, a)
    y_grid = np.linspace(0, q - 1, b)
    x_grid = np.linspace(0, r - 1, c)
    RGI = RegularGridInterpolator((z_grid, y_grid, x_grid), input_mx)
    z_grid_t2 = np.arange(p)
    y_grid_t2 = np.arange(q)
    x_grid_t2 = np.arange(r)
    meshgrid_para = np.meshgrid(z_grid_t2, y_grid_t2, x_grid_t2)
    RGI_mesh_mx = RGI((meshgrid_para[0], meshgrid_para[1], meshgrid_para[2]))
    RGI_mx = np.transpose(RGI_mesh_mx, axes=[1, 0, 2]).astype(dtype)
    return RGI_mx

def happy_time(start,stop):
    process_time = round(stop - start)
    ss = process_time % 60
    mm = process_time // 60 % 60
    hh = process_time // 3600
    duration = "Process time == {}s == {}H {}m {}s".format(process_time,hh,mm,ss)
    return duration

In [19]:
path_unknown = glob.glob('database/image_data/test/*')
path_unknown[:5]

['database/image_data/test\\0028624c49b3e0610ff9f1d111f5d532.jpg',
 'database/image_data/test\\002c30700185b7971369258b438070d5.jpg',
 'database/image_data/test\\00852f4f666acecd0c0d140365b42efd.jpg',
 'database/image_data/test\\00c08828fce04e360c732cac01edad9e.jpg',
 'database/image_data/test\\00d366e7877b6a78b104b57d67b60e6b.jpg']

In [24]:
#path_train_0 = glob.glob(os.path.join(path_train, list(flower_mapping.keys())[i], '*'))
#pic_store_dict = None
#pic_store_dict = {}

id_li = []
flower_class = []
pred_li = []
loop_start = time.time()

for j in range(len(path_unknown)):
    serial = os.path.basename(path_unknown[j])[:-4]
    temp_pic = np.asarray(Image.open(path_unknown[j]))
    temp_shape = np.shape(temp_pic)
    if temp_shape[0] > 640:
        temp_pic = temp_pic[:640]
        temp_shape = np.shape(temp_pic)
    if temp_shape[1] > 640:
        temp_pic = temp_pic[:, :640]
        temp_shape = np.shape(temp_pic)
    if temp_shape[0] > 240:
        temp_pic = resampleRGI3d(temp_pic, (240, int(temp_shape[1]*240/temp_shape[0]), 3))
        temp_shape = np.shape(temp_pic)
    if temp_shape[1] > 240:
        temp_pic = resampleRGI3d(temp_pic, (int(temp_shape[0]*240/temp_shape[1]), 240, 3))
        temp_shape = np.shape(temp_pic)
    temp_pic = np.pad(temp_pic, ((0, 240 - temp_shape[0]), (0, 240 - temp_shape[1]), (0, 0)), 'constant', constant_values=0)
    temp_pic = np.expand_dims(temp_pic, axis = 0)
    pred = net_final.predict(temp_pic)[0]
    pred_li.append(pred)
    id_li.append(serial)
    flower_class.append(list(pred).index(max(pred)))
    if (j+1) % 200 == 0:
        print(j+1, 'files done.', happy_time(loop_start, time.time()))
    #pic_store_dict[serial] = temp_pic
#stack_a = np.stack([pic_store_dict[x] for x in list(pic_store_dict.keys())], axis=0)
#total_dict[sub_folder_li[i]] = stack_a

200 files done. Process time == 13s == 0H 0m 13s
400 files done. Process time == 28s == 0H 0m 28s
600 files done. Process time == 42s == 0H 0m 42s
800 files done. Process time == 56s == 0H 0m 56s
1000 files done. Process time == 71s == 0H 1m 11s
1200 files done. Process time == 84s == 0H 1m 24s
1400 files done. Process time == 97s == 0H 1m 37s
1600 files done. Process time == 111s == 0H 1m 51s
1800 files done. Process time == 124s == 0H 2m 4s
2000 files done. Process time == 138s == 0H 2m 18s


In [25]:
pred_li

[array([0., 0., 0., 0., 1.], dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=float32),
 array([0.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00,
        3.364495e-11], dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=float32),
 array([0.000000e+00, 0.000000e+00, 0.000000e+00, 6.998077e-10,
        1.000000e+00], dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=float32),
 array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.5695256e-22,
        1.0000000e+00], dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=float32),
 array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.3424837e-31,
        1.0000000e+00], dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=float32),
 array([0., 0., 0., 1., 0.], dtype=float32),
 array([0., 0., 0., 1., 0.], dtype=float32),
 array([0., 0., 0., 0., 1.], dtype=flo

In [21]:
pred_result_df = pd.DataFrame(columns=['id', 'flower_class'])
pred_result_df['id'] = id_li
pred_result_df['flower_class'] = flower_class
pred_result_df

Unnamed: 0,id,flower_class
0,0028624c49b3e0610ff9f1d111f5d532,4
1,002c30700185b7971369258b438070d5,4
2,00852f4f666acecd0c0d140365b42efd,4
3,00c08828fce04e360c732cac01edad9e,4
4,00d366e7877b6a78b104b57d67b60e6b,4
...,...,...
1995,ff7eac29b6d7a33fbd8009677c3e9c58,3
1996,ffbc32a7b67dfe72b8d35d4b1b35fd6c,4
1997,ffea1f275c05accb0a6bfd1203620c7e,3
1998,ffeb2a1cf53464b6af937ab8af0c2946,4


In [22]:
pred_result_df.to_csv('model/pred_result_STD_datagen_1230_05_datagen_RESNET.csv', index=False)

In [23]:
#####
#####
#####