## 機器學習百日馬拉松期末考 - 花朵辨識 (model1) ##

期末考帶大家挑戰的是花朵辨識，選定的範圍內共有五種不同種類的花 : 雛菊(daisy)、蒲公英(dandellion)、玫瑰(rose)、向日葵(sunflower)、鬱金香(tulip)，請以同學使用訓練資料當中的照片，並應用在深度學習階段所學到的內容，來辨識照片中的是哪種花。 本測驗的目的，在於讓同學練習並熟悉影像辨識的做法，實際操作後半部課程的內容。尤其是一般 CNN模型與 Pre-training model 的差距，也希望同學能透過這次測驗體驗到。

特徵說明
圖形辨識的特徵就是圖檔本身，因此訓練特徵就是圖片本身，不另做說明。而作答的 id 就是檔名，同學可以詳閱 "Data" 分頁的說明以及 sample_submission.csv 的內容。 比較不同的是預測的輸出值，請同學特別注意 : 以數字 0 / 1 / 2 / 3 / 4 輸出你的要提交預測類別，而不是以花朵名稱輸出 ( 建議以 Python Dictionary 轉換，或輸出時直接是類別碼，例如 : flower_mapping = {'daisy':0, 'dandelion':1, 'rose':2, 'sunflower':3, 'tulip':4} )

### 訓練資料 ###

In [1]:
import keras
import os
import glob
import numpy as np
from keras.preprocessing.image import  img_to_array, load_img
from PIL import Image
from keras.callbacks import EarlyStopping
# Disable GPU
os.environ["CUDA_VISIBLE_DEVICES"] = ""

Using TensorFlow backend.


In [2]:
dict_labels = {"daisy":0, "dandelion":1, "rose": 2, "sunflower":3, "tulip": 4}
size = (128, 128)
nbofdata=700
base_path = r'ml100-03-final/image_data/train/'
layers_of_folders=0
folder_list=[]    
labels=['daisy','dandelion', 'rose', 'sunflower', 'tulip']

if base_path :
    folder_layers=[]
    files = os.scandir(base_path)
    #  Get the 1st layer of folder
    first_folder = []
    first_folder_kind = []
    for entry in files:
        if entry.is_dir():
            first_folder.append(entry.path)
            first_folder_kind.append(entry.name)
    folder_layers.append(first_folder_kind)
    folder_list.append(first_folder)

In [3]:
folder_list

[['ml100-03-final/image_data/train/daisy',
  'ml100-03-final/image_data/train/rose',
  'ml100-03-final/image_data/train/tulip',
  'ml100-03-final/image_data/train/dandelion',
  'ml100-03-final/image_data/train/sunflower']]

In [4]:
datanumber=nbofdata
blob=[]
blob_nparray=[]
image_data=[]
conc = 0
labels_dict={}
for entry1 in folder_list[layers_of_folders - 1]:
    blob = []
    cellname = os.path.basename(os.path.dirname(entry1))  # extract cell name
    # print(cellname)
    concnames = os.path.basename(entry1)  # extract concentration
    # print(concnames)
    if concnames in labels:
        labels_dict[conc] = concnames
        fnamelist = glob.glob(os.path.join(entry1, '*.jpg'))
        for filename in fnamelist[0:datanumber]:
            im = Image.open(filename)
            if im is not None:
                if im.mode=='RGB':
                    im=im.resize(size,Image.BILINEAR)
                    imarray = np.array(im)
                    blob.append(imarray)
        ind = np.reshape(np.arange(1, len(blob) + 1), (-1, 1))
        blob_nparray = np.reshape(np.asarray(blob), (len(blob), blob[1].size))
        blob_nparray = np.hstack((blob_nparray, ind, conc * np.ones((len(blob), 1))))
        image_data.append(np.asarray(blob_nparray, dtype=np.float32))
        print(concnames+'  finished!')
        conc += 1

daisy  finished!
rose  finished!
tulip  finished!
dandelion  finished!
sunflower  finished!


In [5]:
image_data
np.shape(image_data[1][2])

(49154,)

In [6]:
for j in range(len(labels)):
    a=image_data[j][:]
# Prepare data
    LengthT = a.shape[0]
    a_index = a[...,-2:-1]
    a_label = a[...,-1:] #['Nega' for x in range(lengthN*4)] #Nega_data[...,-1:]
    a = a[...,:-2]
    
    # Normalize image by subtracting mean image
    a -= np.reshape(np.mean(a, axis=1), (-1,1))
    # Reshape images
    a = np.reshape(a, (a.shape[0],128,128,3))

    # Add channel dimension to fit in Conv2D
    a = a.reshape(-1,128,128,3)
    np.random.shuffle(a)
    a_train_upto = round(a.shape[0] * 8 / 10)
    a_test_upto = a.shape[0]
    if j is 0:
        train_data = a[:a_train_upto]
        test_data = a[a_train_upto:a_test_upto]
        train_label = a_label[:a_train_upto]
        test_label = a_label[a_train_upto:a_test_upto]
        
    else:
        train_data = np.concatenate((train_data, 
                                     a[:a_train_upto]), axis=0)
        
        test_data = np.concatenate((test_data, 
                                    a[a_train_upto:a_test_upto]), axis=0)
        
        train_label = np.concatenate((train_label, 
                                     a_label[:a_train_upto]), axis=0)
        
        
        test_label = np.concatenate((test_label, 
                                    a_label[a_train_upto:a_test_upto]), axis=0)
        
test_label = keras.utils.to_categorical(test_label, num_classes=len(labels))
train_label = keras.utils.to_categorical(train_label, num_classes=len(labels))

In [7]:
test_data.shape
test_data

array([[[[ 1.2082792e+02,  1.2382792e+02,  1.4182791e+02],
         [ 1.3482791e+02,  1.3682791e+02,  1.5082791e+02],
         [ 1.0482792e+02,  1.0582792e+02,  1.1082792e+02],
         ...,
         [-6.2172081e+01, -4.7172081e+01, -7.5172081e+01],
         [-5.4172081e+01, -3.1172081e+01, -7.2172081e+01],
         [-3.9172081e+01, -1.6172081e+01, -6.5172081e+01]],

        [[ 1.4782791e+02,  1.4582791e+02,  1.5182791e+02],
         [ 1.5882791e+02,  1.5782791e+02,  1.6282791e+02],
         [ 1.5682791e+02,  1.5682791e+02,  1.6582791e+02],
         ...,
         [-4.1172081e+01, -2.5172081e+01, -5.9172081e+01],
         [-4.3172081e+01, -2.0172081e+01, -6.4172081e+01],
         [-3.8172081e+01, -1.6172081e+01, -6.5172081e+01]],

        [[ 1.5782791e+02,  1.5382791e+02,  1.6382791e+02],
         [ 1.6382791e+02,  1.6282791e+02,  1.6982791e+02],
         [ 1.5282791e+02,  1.5082791e+02,  1.6482791e+02],
         ...,
         [-4.4172081e+01, -2.5172081e+01, -6.1172081e+01],
         [

In [8]:
import random
temp = list(zip(train_data, train_label))

random.shuffle(temp)

train_data,train_label = zip(*temp)

train_data=np.asarray(train_data)
train_label=np.asarray(train_label)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD,RMSprop,Adam
# Generate model
model = Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', input_shape=(128,128,3),padding='same',name='block1_conv2_1'))
model.add(Conv2D(64, (3, 3), activation='relu',padding='same',name='block1_conv2_2'))
model.add(MaxPooling2D(pool_size=(2, 2),name='block1_MaxPooling'))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu',padding='same',name='block2_conv2_1'))
model.add(Conv2D(128, (3, 3), activation='relu',padding='same',name='block2_conv2_2'))
model.add(MaxPooling2D(pool_size=(2, 2),name='block2_MaxPooling'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu',name='final_output_1'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu',name='final_output_2'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='sigmoid',name='class_output'))
model.summary()
optimizer = Adam(lr=1e-2)
objective = 'binary_crossentropy'
model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])
EStop = EarlyStopping(monitor='val_accuracy', min_delta=0, 
                      patience=10, verbose=1, mode='auto')
history = model.fit(train_data, train_label, batch_size=64, epochs=30,shuffle=True, validation_split=0.2,callbacks=[EStop])
model.save('flower_model.h5') 
predictions=model.predict(test_data)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv2_1 (Conv2D)      (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2_2 (Conv2D)      (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_MaxPooling (MaxPoolin (None, 64, 64, 64)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv2_1 (Conv2D)      (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2_2 (Conv2D)      (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_MaxPooling (MaxPoolin (None, 32, 32, 128)      

In [10]:
for i in range(10):
    for j in range(5):
        print(predictions[i][j])

0.27163804
0.11994648
0.18918073
0.113135636
0.0064148605
0.5642266
0.0061747134
0.006629348
0.508207
0.005930245
0.8418596
0.002156943
0.007041037
0.29427707
0.0027873814
0.43616688
0.041024834
0.059387863
0.21953177
0.5188435
0.15124562
0.0036665797
0.044458568
0.39263138
0.29798138
0.89831805
0.00466007
0.010843813
0.14543337
0.010159254
0.8325744
0.0038253367
0.007269591
0.21754852
0.020522028
0.7977431
0.025208145
0.05869165
0.0320836
0.023082048
0.44554293
0.0058656633
0.014021188
0.8050128
0.006624222
0.84674776
0.026755571
0.0025267303
0.046795517
0.012161225


### 驗證資料 ###

In [22]:
dict_labels = {"test":0}
size = (128, 128)
nbofdata=2000
base_path = r'ml100-03-final/image_data/test/'
layers_of_folders=0
folder_list=[]    
labels=['test']

if base_path :
    folder_layers=[]
    files = os.scandir(base_path)
    #  Get the 1st layer of folder
    first_folder = []
    first_folder_kind = []
    for entry in files:
        if entry.is_dir():
            first_folder.append(entry.path)
            first_folder_kind.append(entry.name)
    folder_layers.append(first_folder_kind)
    folder_list.append(first_folder)

In [23]:
datanumber=nbofdata
blob=[]
blob_nparray=[]
image_data=[]
conc = 0
fc = 0
labels_dict={}
fn = {}
for entry1 in folder_list[layers_of_folders - 1]:
    blob = []
    cellname = os.path.basename(os.path.dirname(entry1))  # extract cell name
    # print(cellname)
    concnames = os.path.basename(entry1)  # extract concentration
    # print(concnames)
    if concnames in labels:
        labels_dict[conc] = concnames
        fnamelist = glob.glob(os.path.join(entry1, '*.jpg'))
        for filename in fnamelist[0:datanumber]:
            im = Image.open(filename)
            if im is not None:
                if im.mode=='RGB':
                    im=im.resize(size,Image.BILINEAR)
                    imarray = np.array(im)
                    blob.append(imarray)
                    fn[fc] = filename
                    fc += 1
        ind = np.reshape(np.arange(1, len(blob) + 1), (-1, 1))
        blob_nparray = np.reshape(np.asarray(blob), (len(blob), blob[1].size))
        blob_nparray = np.hstack((blob_nparray, ind, conc * np.ones((len(blob), 1))))
        image_data.append(np.asarray(blob_nparray, dtype=np.float32))
        print(ind)
        print(blob_nparray)
        print(concnames+'  finished!')
        conc += 1

[[   1]
 [   2]
 [   3]
 ...
 [1998]
 [1999]
 [2000]]
[[9.600e+01 1.000e+02 8.900e+01 ... 1.000e+02 1.000e+00 0.000e+00]
 [5.500e+01 4.500e+01 4.400e+01 ... 1.670e+02 2.000e+00 0.000e+00]
 [1.700e+02 1.620e+02 1.400e+02 ... 8.800e+01 3.000e+00 0.000e+00]
 ...
 [3.900e+01 8.900e+01 7.700e+01 ... 1.260e+02 1.998e+03 0.000e+00]
 [8.800e+01 8.700e+01 5.700e+01 ... 4.500e+01 1.999e+03 0.000e+00]
 [5.100e+01 5.600e+01 6.800e+01 ... 1.420e+02 2.000e+03 0.000e+00]]
test  finished!


In [24]:
np.shape(fn)
fn[0]

'ml100-03-final/image_data/test/test/e86789078f3731bdc3d1e740825f29b3.jpg'

In [26]:
s = 'ml100-03-final/image_data/test/test/b38d1fef59f487bf8e702c5eab79880d.jpg'
p = s.split('/')
p = p[4].split('.jpg')
p[0]

sp = {}
for i in range(2000):
    sp[i] = fn[i].split('/')
    sp[i] = sp[i][4].split('.jpg')

In [27]:
ids = {}
for i in range(2000):
    ids[i] = sp[i][0]

In [28]:
for j in range(len(labels)):
    b=image_data[j][:]
# Prepare data
    LengthT = b.shape[0]

    b_index = b[...,-2:-1]

    b_label = b[...,-1:] 

    b = b[...,:-2]

    # Normalize image by subtracting mean image
    b -= np.reshape(np.mean(b, axis=1), (-1,1))
    # Reshape images
    b = np.reshape(b, (b.shape[0],128,128,3))
    
    # Add channel dimension to fit in Conv2D
    b = b.reshape(-1,128,128,3)
    b_test_upto = b.shape[0]
    if j is 0:
        test_data = b[:b_test_upto]      
    else:     
        test_data = np.concatenate((test_data, 
                                    b[b_train_upto:b_test_upto]), axis=0)

In [29]:
test_data.shape
test_data

array([[[[-1.0887146e-02,  3.9891129e+00, -7.0108871e+00],
         [ 6.9891129e+00,  1.8989113e+01, -5.0108871e+00],
         [-1.0887146e-02,  1.1989113e+01, -1.6010887e+01],
         ...,
         [-1.5010887e+01, -1.4010887e+01, -2.0010887e+01],
         [-1.9010887e+01, -1.8010887e+01, -2.3010887e+01],
         [-2.3010887e+01, -2.2010887e+01, -2.8010887e+01]],

        [[ 4.3989113e+01,  4.7989113e+01,  3.8989113e+01],
         [ 9.9891129e+00,  1.6989113e+01, -8.0108871e+00],
         [-2.4010887e+01, -1.7010887e+01, -4.2010887e+01],
         ...,
         [-1.6010887e+01, -1.4010887e+01, -2.0010887e+01],
         [-2.5010887e+01, -2.4010887e+01, -2.9010887e+01],
         [-2.8010887e+01, -2.7010887e+01, -3.3010887e+01]],

        [[ 4.7989113e+01,  5.1989113e+01,  4.3989113e+01],
         [ 1.0989113e+01,  1.3989113e+01,  3.9891129e+00],
         [-3.3010887e+01, -3.0010887e+01, -4.3010887e+01],
         ...,
         [-1.8010887e+01, -1.6010887e+01, -2.1010887e+01],
         [

In [30]:
predictions2=model.predict(test_data)

In [31]:
for i in range(10):
    for j in range(5):
        print(predictions2[i][j])

0.04909572
0.013113946
0.055634975
0.06339875
0.9230974
0.19162726
0.07953444
0.15324664
0.22683492
0.6789116
0.11249247
0.055561423
0.16546181
0.5317094
0.29542089
0.06979835
0.0031498969
0.002164036
0.9502262
0.0027238429
0.045027047
0.028469235
0.04521975
0.17088869
0.8759639
0.060619175
0.36615753
0.48053402
0.2016134
0.059325665
0.009342641
0.008719653
0.015091211
0.16177347
0.9278759
0.026359081
0.43176892
0.53215563
0.056786954
0.014728487
0.9058945
0.025457412
0.00083592534
0.045111954
0.0011349022
0.59874576
0.0059898794
0.007635206
0.55476135
0.028644353


In [32]:
import matplotlib.pyplot as plt
from keras.preprocessing.image import array_to_img

plt.imshow(array_to_img(test_data[1]))
plt.show()

print(np.max(predictions2[0]))

<Figure size 640x480 with 1 Axes>

0.9230974


In [33]:
cnn_pred = {}
for i in range(2000):
    for j in range(5):
        if predictions2[i][j] == np.max(predictions2[i]):
            cnn_pred[i] = j

In [34]:
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

submit = pd.DataFrame({'id': ids, 'flower_class': cnn_pred})
header = ["id", "flower_class"]
submit.to_csv('cnn_predict.csv', columns = header, index=False)