# 2019 Acer Summer Intern Final Presentation @ CYL

## <font color='dark'>Import</font>

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
# My package
from image_processing.img_processing import image_prepossessing
from NN.Resnet50 import ResNet50
%matplotlib inline

Using TensorFlow backend.


## <font color='dark'>Read the Image</font>

先讀進之前整理過的 csv 檔

In [0]:
train_df = pd.read_csv('./data/train_clean_df.csv')
test_df = pd.read_csv('./data/test_clean_df.csv')
val_df = pd.read_csv('./data/val_clean_df.csv')

train_df.head(5)

Unnamed: 0,file,label
0,../data/train/data_0/10060_20141120_T_2.jpg,0
1,../data/train/data_0/14447_20150211_T_1.jpg,0
2,../data/train/data_0/10876_20131114_T_2.jpg,0
3,../data/train/data_0/13494_20090507_T_1.jpg,0
4,../data/train/data_0/11276_20140609_T_2.jpg,0


**<font color='red'>整理後的相對路徑</font>** 和 **<font color='red'>主程式的相對路徑</font>** 不同，我們先做一下調整

In [0]:
train_df.file = [i.split('/', 1)[1] for i in train_df.file]
test_df.file = [i.split('/', 1)[1] for i in test_df.file]
val_df.file = [i.split('/', 1)[1] for i in val_df.file]

train_df.head(5)

Unnamed: 0,file,label
0,data/train/data_0/10060_20141120_T_2.jpg,0
1,data/train/data_0/14447_20150211_T_1.jpg,0
2,data/train/data_0/10876_20131114_T_2.jpg,0
3,data/train/data_0/13494_20090507_T_1.jpg,0
4,data/train/data_0/11276_20140609_T_2.jpg,0


把所有圖片檔讀進來，經過 **<font color='red'>image processing</font>**

發現 train data 的 0 和 1 的比率並不是那麼平均, 因此我們必須 balance 一下，例如將 0 的資料做旋轉、調整亮度對比等

In [0]:
print("Training data destribution : \n\n{}".format(train_df.groupby('label').size()))

Training data destribution : 

label
0     435
1    2164
dtype: int64


In [0]:
rescale_img = ImageDataGenerator(rescale=(1.0/255.0))

In [0]:
def rotate_img(image, angle):
    cols, rows = image.shape[:2]
    M = cv2.getRotationMatrix2D((cols/2,rows/2), angle, 1)
    dst = cv2.warpAffine(image,M,(cols,rows))
    return dst

In [0]:
def ReadImg2array(dataframe, for_train=False):
    data = []
    label = []

    for image,label_num in zip(dataframe['file'], dataframe['label']):
        # Store the img as array
        img = cv2.imread(image, 0)                        # read in gray scale
        img = image_prepossessing(img)                    # through the image processing
        img = cv2.resize(img, (512,512))                  # resize the image
                   
        if (for_train==True) and (label_num==0): 
            angle = (0, 60, 90, 180, 270)
            for rotate_angle in angle:
                label.append(label_num)                           # Store the label  
                img = rotate_img(img, rotate_angle)
                img = img.reshape(img.shape[0], img.shape[1], 1)  # reshape to (width, height, channel)
                data.append(img)                  # Store the data 
        else:
            label.append(label_num)                           # Store the label      
            img = img.reshape(img.shape[0], img.shape[1], 1)  # reshape to (width, height, channel)
            data.append(img)                                  # Store the data
    
    return np.asarray(data, dtype=np.float32), np.asarray(label, dtype=np.float32)

**<font color="red">轉成 矩陣 (.npy) 存起來</font>**

In [0]:
train_data, train_label = ReadImg2array(train_df, for_train=True)
np.save("./data/train_data_1.npy",train_data)
np.save("./data/train_label_1.npy",train_label)


In [0]:
test_data, test_label = ReadImg2array(test_df)
np.save("./data/test_data.npy",test_data)
np.save("./data/test_label.npy",test_label)

In [0]:
val_data, val_label = ReadImg2array(val_df)
np.save("./data/val_data.npy",val_data)
np.save("./data/val_label.npy",val_label)

In [0]:
train_data = np.load('./data/train_data_1.npy'); train_label = np.load('./data/train_label_1.npy')
test_data = np.load('./data/test_data.npy'); test_label = np.load('./data/test_label.npy')
val_data = np.load('./data/val_data.npy');  val_label = np.load('./data/val_label.npy')

train_label = to_categorical(train_label) ; test_label = to_categorical(test_label) ; val_label = to_categorical(val_label)

data_shape = ((train_data, train_label,'train'), (test_data, test_label,'test'), (val_data, val_label,'validation'))
for data,label,using in data_shape:
    print("The shape of {} data : {}".format(using, data.shape))
    print("The shape of {} label : {}\n".format(using, label.shape))
    
# validation data 太多，我們隨機取400個來用
    
np.random.seed(3)
np.random.shuffle(val_data)
np.random.seed(3)
np.random.shuffle(val_label)
val_data = val_data[:400]
val_label = val_label[:400]

print("The shape of new validation  data : {}".format(val_data.shape))
print("The shape of new validation  label : {}\n".format(val_label.shape))

The shape of train data : (4339, 512, 512, 1)
The shape of train label : (4339, 2)

The shape of test data : (807, 512, 512, 1)
The shape of test label : (807, 2)

The shape of validation data : (795, 512, 512, 1)
The shape of validation label : (795, 2)

The shape of new validation  data : (400, 512, 512, 1)
The shape of new validation  label : (400, 2)



## <font color='dark'>Load the Net Work</font>

In [0]:
ResNet50_model = ResNet50(512, 512, 1, 2)
ResNet50_model.summary()

W0823 10:45:21.923304 140458690385728 deprecation_wrapper.py:119] From /home/acerintern/anaconda3/envs/tf-gpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0823 10:45:22.328562 140458690385728 deprecation_wrapper.py:119] From /home/acerintern/anaconda3/envs/tf-gpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0823 10:45:22.391908 140458690385728 deprecation_wrapper.py:119] From /home/acerintern/anaconda3/envs/tf-gpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0823 10:45:22.494492 140458690385728 deprecation_wrapper.py:119] From /home/acerintern/anaconda3/envs/tf-gpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_defau

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 512, 512, 1)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 256, 256, 64) 3200        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 256, 256, 64) 256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 256, 256, 64) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
max_poolin

  model = Model(input=Img, output=x)


In [0]:
# Definite the callback function
callbacks_list = [EarlyStopping(monitor='val_acc',
                                patience=8),
                  ModelCheckpoint(filepath='resnet_2nd.h5',
                                  monitor='val_loss',
                                  save_best_only=True)]

# Compile
ResNet50_model.compile(loss='binary_crossentropy',
                       optimizer=Adam(),
                       metrics=['accuracy'])

# Using Image Data generator
train_datagen = ImageDataGenerator(rescale=1./255,shear_range=0.2,zoom_range=0.2,horizontal_flip=True)
test_val_datagen = ImageDataGenerator(rescale=1./255)

train_set = train_datagen.flow(train_data, train_label, batch_size=20)
test_set = test_val_datagen.flow(test_data, test_label, batch_size=20)
val_set = test_val_datagen.flow(val_data, val_label, batch_size=20)

# inismtialize the number of epochs and batch size
EPOCHS = 100

# train the model
ResNet50_model.fit_generator(train_set,
                             steps_per_epoch=train_data.shape[0]/20,
                             validation_data=val_set,
                             validation_steps=val_data.shape[0]/20,
                             epochs=EPOCHS)

score = ResNet50_model.evaluate(test_data, test_label)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


W0823 10:47:01.922179 140458690385728 deprecation_wrapper.py:119] From /home/acerintern/anaconda3/envs/tf-gpu/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0823 10:47:01.928627 140458690385728 deprecation.py:323] From /home/acerintern/anaconda3/envs/tf-gpu/lib/python3.7/site-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/100


ResourceExhaustedError: OOM when allocating tensor with shape[32,128,128,512] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[{{node conv2d_21/Relu}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


In [0]:
train_set

<keras_preprocessing.image.numpy_array_iterator.NumpyArrayIterator at 0x7f9d7a1b2ba8>