## Data Reading

In [None]:
import cv2
import os
import keras
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from glob import glob
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split

製作label

In [None]:
path = '/content/drive/MyDrive/ColabWorkBench/MLMarathon/Final_Exam/train'
names = os.listdir(path) # names = ['daisy', 'dandelion', 'tulip', 'rose', 'sunflower']
class_map = {'daisy':0, 'dandelion':1, 'rose':2, 'sunflower':3, 'tulip':4}

# 製作一個空字典, 準備承接花名及數量資料
num_img = dict(zip(names,('','','','','')))
for name in names:
  tmp_path = os.path.join(path, name)
  img_list = glob(tmp_path+'/*.jpg')
  num_img[name] = len(img_list)
  print(f'{len(img_list)} images of {name}')

# 利用pandas進行label製作及排序
label = []
for i in names:
  label += [i]*num_img[i]

label = pd.DataFrame(label)

# 將str轉為相對應數字編號, 以利後續模型訓練
label[0] = label[0].map(class_map) 

# map轉換後, 順序可能會不一樣, 故重新排序
label = label.sort_values(by=[0],ascending=True).reset_index(drop=True)
label = np.array(label)


# 轉為one-hot encoding
label_onehot = keras.utils.to_categorical(label, num_classes=5)
print('label: ',label_onehot[0:4,:])

讀入圖片並resize成128 x 128 ( w x h )

In [None]:
path = '/content/drive/MyDrive/ColabWorkBench/MLMarathon/Final_Exam/train'
names = os.listdir(path) # names = ['daisy', 'dandelion', 'tulip', 'rose', 'sunflower']

# 讀取各圖片實體位置, 並儲存於變數data
data = {}
img_paths = []
for name in names:
  tmp_list = []
  tmp_path = os.path.join(path, name)
  img_list = glob(tmp_path+'/*.jpg')
  data[name]=img_list
  img_paths.extend(data[name])

data_count = len(img_paths)
print(f'total images: {data_count}')

In [None]:


# 定義一個讀取及轉換尺寸的func
def read_img(img_path, target_size=(128, 128)):
  img = cv2.imread(img_path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  img = cv2.resize(img,target_size)
  return img


X = np.zeros((data_count, 128, 128, 3))

# 讀取資料
for i, path in tqdm(enumerate(img_paths), total=len(img_paths)):
  X[i] = read_img(path)

In [None]:
# train/test split
x_train, x_test, y_train, y_test = train_test_split(X, label_onehot, test_size=0.15)
print(f'x_train shape: {x_train.shape}')
print(f'x_test shape: {x_test.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'y_test shape: {y_test.shape}')

## Data Generator & Augmentation

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
datagen = ImageDataGenerator(        
        rotation_range=45,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.5,
        
        vertical_flip=True,
        zoom_range=0.8,
        channel_shift_range=80,
                 
        horizontal_flip=True)


# rescale= 1.0/255 shear_range=0.6channel_shift_range=80,

## Model Building

In [None]:
from keras.layers import Input, Dense, Flatten, BatchNormalization, Dropout
from keras.models import Model
from keras.callbacks import ReduceLROnPlateau
from keras import regularizers

In [None]:
datagen.fit(x_train)

In [None]:
# keras.backend.clear_session()

In [None]:
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.75, patience=5, verbose=1, min_lr=1e-7 )
LR = 0.001
pen=0.00001


In [None]:
base_model = keras.applications.Xception(include_top=False, weights='imagenet', input_shape=(128, 128, 3),pooling='max' ) # weights='imagenet'
x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation='relu',kernel_regularizer=regularizers.l2(pen))(x)
x = BatchNormalization()(x)
x = Dropout(0.7)(x)
x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(pen))(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu', )(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)


predictions = Dense(5, activation='softmax')(x)
model = Model(base_model.input, predictions)


## Model Training

In [None]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=LR),
              metrics=['accuracy'])



logs = model.fit(datagen.flow(x_train, y_train, batch_size=256),
          steps_per_epoch=x_train.shape[0]//256,
          epochs=80,
          verbose=1,
          callbacks=[reduce_lr],
          shuffle=True,    
          validation_data=(x_test, y_test),
          )

In [None]:
history = logs.history
plt.plot(history['accuracy'])
plt.plot(history['val_accuracy'])
plt.legend(['acc', 'val_accuracy'])
plt.title('Accuracy')



In [None]:
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.legend(['loss', 'val_loss'])
plt.title('Loss')

## model prediction

In [None]:
path = '/content/drive/MyDrive/ColabWorkBench/MLMarathon/Final_Exam/test'


# 讀取各圖片實體位置

img_list = glob(path+'/*.jpg')

total_test_img_count = len(img_list)
print(f'total test images: {total_test_img_count}')

# 以id_list儲存個圖片id
id_list = [img_list[x].replace('.jpg','').split('/')[-1] for x in range(len(img_list))]
id_list[0:5]


In [None]:
img_list[0:5]

In [None]:
# 讀取測試集圖片
X_test = np.zeros((total_test_img_count, 128, 128, 3))
for i, path in tqdm(enumerate(img_list), total=len(img_list)):
  X_test[i] = read_img(path) # read & resize to target pixels

In [None]:
X_test.shape

In [None]:
# model prediction

pred = model.predict(X_test)

In [None]:
pred.shape

In [None]:
pred

In [None]:
d = np.argmax(pred, axis=1)
d.shape

In [None]:
print(d[:5])

In [None]:
ans = pd.DataFrame({'id':id_list,'flower_class':d})
ans

In [None]:
ans.to_csv('/content/drive/MyDrive/ColabWorkBench/MLMarathon/Final_Exam/0309_out2.csv', index=False)