# Load AOI dataset

In [66]:
#read csv
import pandas as pd
AOI_data = pd.read_csv('train.csv')

In [67]:
import cv2
#Observe the number of data of each class
label = []
for i in range(6):
  temp = AOI_data[AOI_data['Label'] == i]
  label.append(temp.reset_index())


In [68]:
train_images = []
train_label = []
test_images = []
test_label = []
for i in range(6):
  #Split the test data (You cannot change the code of this part. We will use these test sets for scoring)
  images_temp = []
  label_temp = [i] * 20
  for j in range(20):
    img = cv2.imread('./train_images/'+label[i]['ID'][j])
    images_temp.append(cv2.resize(img,(224,224), cv2.INTER_AREA))
  test_images += images_temp
  test_label += label_temp

  #Augment data to make the number of training data of each class is the same
  #Write the code

  train_images += images_temp
  train_label += label_temp


In [69]:
#Change list to array
import numpy as np
from sklearn.utils import shuffle
x_train = np.array(train_images)
x_test = np.array(test_images)
y_train = np.array(train_label)
y_test = np.array(test_label)

In [70]:
#Shuffle the dataset
import random
x_train , y_train = shuffle(x_train, y_train, random_state=random.seed())


# Check the shape of data

In [71]:
print('x_train shape: ', x_train.shape)
print('x_test shape: ', x_test.shape)

x_train shape:  (120, 224, 224, 3)
x_test shape:  (120, 224, 224, 3)


# Check the shape of label

In [72]:
print('y_train shape:',y_train.shape)
print('y_test shape:', y_test.shape)

y_train shape: (120,)
y_test shape: (120,)


# Check the content of labels

In [73]:
print(y_train[0:10])

[1 4 4 3 2 1 1 3 0 4]


# Min-max normalization

In [74]:
x_train_norm = x_train.astype('float32')/255
x_test_norm = x_test.astype('float32')/255

In [75]:
print(x_train_norm[0][0][0])

[0.69411767 0.69411767 0.69411767]


# Use One-hot encoding for the label

In [76]:
from tensorflow.keras import utils

# Before transfer....
print(y_train[0])

1


In [77]:
# One-hot encoding...
y_train_onehot = utils.to_categorical(y_train, 6)
y_test_onehot = utils.to_categorical(y_test, 6)
# After transfer...
print(y_train_onehot[0])

[0. 1. 0. 0. 0. 0.]


# Build CNN Model

In [78]:
import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D

import tensorflow.keras.applications as tensorflow_model

In [79]:
vgg16 = tensorflow_model.VGG16(weights = 'imagenet',include_top=False,input_shape=(224,224,3)) #Please fill in the model parameters.

In [86]:
#Build the model
num_classes = 6
x = vgg16.layers[-1].output
x = Flatten()(x)
x = Dropout(0.4)(x)
x = Dense(num_classes,activation = 'softmax')(x)

# Create your own model 
cnn = keras.models.Model(inputs = vgg16.input, outputs=x) 
cnn.summary()

Model: "model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [81]:
# The setting of training model #
adam = tf.optimizers.Adam(0.0001)
cnn.compile(optimizer = adam,loss = 'categorical_crossentropy', metrics = ['acc'])

In [83]:
# Train the model #
history = cnn.fit(x=x_train_norm,y=y_train_onehot,batch_size=32,epochs=30,validation_split=0.1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


# Use test set to evalute the model

In [84]:
# evaluate
test_loss,test_val = cnn.evaluate(x_test_norm,y_test_onehot)
print('The loss of testing data:', test_loss)
print('The accuracy of testing data:', test_val)

The loss of testing data: 0.27761512994766235
The accuracy of testing data: 0.8999999761581421


In [85]:
# predict
predict_prop = cnn.predict(x_test_norm)
print('第一筆測試資料的預測機率', predict_prop[0])

第一筆測試資料的預測機率 [0.17887469 0.00274437 0.76536924 0.04305771 0.00866015 0.00129398]
