In [1]:
from tensorflow.keras import applications
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers, Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
img_width, img_height = 224, 224
train_data_dir = 'training_set'
validation_data_dir = 'test_set'

# 1.MobileNet with large data

In [3]:
mobile = applications.MobileNet(weights = 'imagenet',
                                include_top = False,
                                input_shape = (img_width, 
                                               img_height, 3))
mobile.summary()

Model: "mobilenet_1.00_224"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (ReLU)        (None, 112, 112, 32

In [4]:
for layer in mobile.layers[:5]:
    layer.trainable = False

In [6]:
x = mobile.output
x = Flatten()(x)
x = Dense(1024, activation = 'relu')(x)
x = Dense(512, activation = 'relu')(x)
predictions = Dense(1, activation = 'sigmoid')(x)

In [7]:
mobile_model = Model(inputs = mobile.input, outputs = predictions)

In [8]:
mobile_model.compile(loss = 'binary_crossentropy',
                     optimizer = 'adam',
                     metrics = ['accuracy'])

In [9]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   horizontal_flip = True,
                                   fill_mode = 'nearest',
                                   zoom_range = 0.3,
                                   width_shift_range = 0.3,
                                   height_shift_range = 0.3,
                                   rotation_range = 0.3)

test_datagen = ImageDataGenerator(rescale = 1./255)

In [10]:
train_generator = train_datagen.flow_from_directory(train_data_dir,
                                                    target_size = (img_height, 
                                                                   img_width),
                                                    batch_size = 32,
                                                    class_mode = 'binary')

validation_generator = test_datagen.flow_from_directory(validation_data_dir,
                                                        target_size = (img_height,
                                                                       img_width),
                                                        class_mode = 'binary')

Found 9110 images belonging to 2 classes.
Found 2993 images belonging to 2 classes.


In [11]:
checkpoint = ModelCheckpoint('bike_car_mobilenet.h5',
                             monitor = 'val_loss',
                             save_best_only = True,
                             save_weights_only = False,
                             mode = 'auto',
                             save_freq = 1)

early = EarlyStopping(monitor = 'val_loss',
                      min_delta = 0.001,
                      mode = 'auto')

In [12]:
import time

t1 = time.time()
mobile_his = mobile_model.fit(train_generator,
                              validation_data = validation_generator,
                              batch_size = 32,
                              epochs = 150,
                              callbacks = [checkpoint, early])
t2 = time.time()

print('MobileNet training took: {:.2f} minutes.'.format((t2 - t1)/ 60))



Epoch 1/150








MobileNet training took: 11.71 minutes.


In [13]:
df_mobile = pd.DataFrame(mobile_his.history)
df_mobile

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,0.717077,0.973655,0.009079,0.996993


- Với lượng dữ liệu lớn và tháo băng một phần mô hình pre-trained, MobileNet cho hiệu suất phân loại tương đối tốt, tuy nhiên kết quả có underfitting nhẹ, tương tự như phương pháp huấn luyện bằng lượng dữ liệu nhỏ.

# 2. InceptionV3 with large data

In [19]:
img_width_new, img_height_new = 299, 299

In [20]:
inception = applications.InceptionV3(weights = 'imagenet',
                                     include_top = False,
                                     input_shape = (img_width_new, img_height_new, 3))
inception.summary()

Model: "inception_v3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 299, 299, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 149, 149, 32) 864         input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 149, 149, 32) 96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 149, 149, 32) 0           batch_normalization[0][0]        
_______________________________________________________________________________________

In [21]:
for layer in inception.layers[: 6]:
    layer.trainable = False

In [24]:
x = inception.output
x = Flatten()(x)
x = Dense(1024, activation = 'relu')(x)
x = Dense(512, activation = 'relu')(x)
predictions = Dense(1, activation = 'sigmoid')(x)

In [25]:
inception_model = Model(inputs = inception.input, outputs = predictions)

In [26]:
inception_model.compile(loss = 'binary_crossentropy',
                        optimizer = 'adam',
                        metrics = ['accuracy'])

In [27]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   horizontal_flip = True,
                                   fill_mode = 'nearest',
                                   zoom_range = 0.3,
                                   width_shift_range = 0.3,
                                   height_shift_range = 0.3,
                                   rotation_range = 0.3)

test_datagen = ImageDataGenerator(rescale = 1./255)

In [28]:
train_generator = train_datagen.flow_from_directory(train_data_dir,
                                                    target_size = (img_height_new, 
                                                                   img_width_new),
                                                    batch_size = 32,
                                                    class_mode = 'binary')

validation_generator = test_datagen.flow_from_directory(validation_data_dir,
                                                        target_size = (img_height_new,
                                                                       img_width_new),
                                                        class_mode = 'binary')

Found 9110 images belonging to 2 classes.
Found 2993 images belonging to 2 classes.


In [29]:
checkpoint = ModelCheckpoint('bike_car_inception.h5',
                             monitor = 'val_loss',
                             save_best_only = True,
                             save_weights_only = False,
                             mode = 'auto',
                             save_freq = 1)

early = EarlyStopping(monitor = 'val_loss',
                      min_delta = 0.001,
                      mode = 'auto')

In [30]:
t1 = time.time()
inception_his = inception_model.fit(train_generator,
                                    validation_data = validation_generator,
                                    batch_size = 32,
                                    epochs = 150,
                                    callbacks = [checkpoint, early])
t2 = time.time()

print('InceptionV3 training took: {:.2f} minutes.'.format((t2 - t1)/ 60))

Epoch 1/150










InceptionV3 training took: 35.89 minutes.


In [45]:
df_inception = pd.DataFrame(inception_his.history)
df_inception.tail()

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,0.828854,0.940944,0.024449,0.990645


- InceptionV3 huấn luyện với lượng dữ liệu lớn cho thấy kết quả tương đối tốt, thời gian huấn luyện nhanh hơn mô hình gốc, tuy nhiên có hiện tượng underfitting nhẹ.

# 3. VGG16 with large data

In [31]:
vgg16 = applications.VGG16(weights = 'imagenet',
                           include_top = False,
                           input_shape = (img_width, img_height, 3))
vgg16.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [32]:
for layer in vgg16.layers[:6]:
    layer.trainable = False

In [33]:
x = vgg16.output
x = Flatten()(x)
x = Dense(1024, activation = 'relu')(x)
x = Dense(512, activation = 'relu')(x)
predictions = Dense(1, activation = 'sigmoid')(x)

In [34]:
vgg16_model = Model(inputs = vgg16.input, outputs = predictions)

In [40]:
vgg16_model.compile(loss = 'binary_crossentropy',
                    optimizer = 'adam',
                    metrics = ['accuracy'])

In [41]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   horizontal_flip = True,
                                   fill_mode = 'nearest',
                                   zoom_range = 0.3,
                                   width_shift_range = 0.3,
                                   height_shift_range = 0.3,
                                   rotation_range = 30)

test_datagen = ImageDataGenerator(rescale = 1./255)

In [42]:
train_generator = train_datagen.flow_from_directory(train_data_dir,
                                                    target_size = (img_height,
                                                                   img_width),
                                                    batch_size = 32,
                                                    class_mode = 'binary')

validation_generator = test_datagen.flow_from_directory(validation_data_dir,
                                                        target_size = (img_height,
                                                                       img_width),
                                                        class_mode = 'binary')

Found 9110 images belonging to 2 classes.
Found 2993 images belonging to 2 classes.


In [43]:
checkpoint = ModelCheckpoint('bike_car_vgg16.h5',
                                  monitor = 'val_loss',
                                  save_best_only = True,
                                  save_weights_only = False,
                                  mode = 'auto',
                                  save_freq = 1)
early = EarlyStopping(monitor = 'val_loss',
                      min_delta = 0.001,
                      mode = 'auto')

In [44]:
t1 = time.time()
vgg16_his = vgg16_model.fit(train_generator,
                            validation_data = validation_generator,
                            batch_size = 32,
                            epochs = 150,
                            callbacks = [checkpoint, early])
t2 = time.time()
print('VGG16 training took: {:.2f} minutes.'.format((t2 - t1)/ 60))

Epoch 1/150






VGG16 training took: 40.49 minutes.


In [46]:
df_vgg16 = pd.DataFrame(vgg16_his.history)
df_vgg16

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,0.981383,0.550055,0.696544,0.400936


- VGG 16 trong trường hợp này cho kết quả khá tệ. Độ chính xác thấp hơn hẳn so với trường hợp dùng dữ liệu nhỏ.

# 4. ResNet50 with large data

In [47]:
resnet50 = applications.ResNet50(weights = 'imagenet',
                                 include_top = False,
                                 input_shape = (img_width, img_height, 3))
resnet50.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_4[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [48]:
for layer in resnet50.layers[:6]:
    layer.trainable = False

In [50]:
x = resnet50.output
x = Flatten()(x)
x = Dense(1024, activation = 'relu')(x)
x = Dense(512, activation = 'relu')(x)
predictions = Dense(1, activation = 'sigmoid')(x)

In [51]:
resnet50_model = Model(inputs = resnet50.input, outputs = predictions)

In [52]:
resnet50_model.compile(loss = 'binary_crossentropy',
                       optimizer = 'adam',
                       metrics = ['accuracy'])

In [53]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   horizontal_flip = True,
                                   fill_mode = 'nearest',
                                   zoom_range = 0.3,
                                   width_shift_range = 0.3,
                                   height_shift_range = 0.3,
                                   rotation_range = 30)

test_datagen = ImageDataGenerator(rescale = 1./255)

In [54]:
train_generator = train_datagen.flow_from_directory(train_data_dir,
                                                    target_size = (img_height,
                                                                   img_width),
                                                    batch_size = 32,
                                                    class_mode = 'binary')

validation_generator = test_datagen.flow_from_directory(validation_data_dir,
                                                        target_size = (img_height,
                                                                       img_width),
                                                        class_mode = 'binary')

Found 9110 images belonging to 2 classes.
Found 2993 images belonging to 2 classes.


In [56]:
checkpoint = ModelCheckpoint('bike_car_resnet50.h5',
                                  monitor = 'val_loss',
                                  save_best_only = True,
                                  save_weights_only = False,
                                  mode = 'auto',
                                  save_freq = 1)
early = EarlyStopping(monitor = 'val_loss',
                      min_delta = 0.001,
                      mode = 'auto')

In [57]:
t1 = time.time()
resnet50_his = resnet50_model.fit(train_generator,
                            validation_data = validation_generator,
                            batch_size = 32,
                            epochs = 150,
                            callbacks = [checkpoint, early])
t2 = time.time()
print('ResNet50 training took: {:.2f} minutes.'.format((t2 - t1)/ 60))

Epoch 1/150








ResNet50 training took: 34.78 minutes.


In [59]:
df_resnet = pd.DataFrame(resnet50_his.history)
df_resnet

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,1.041055,0.873875,0.669865,0.668226


- ResNet50 cho kết quả thấp tương tự như khi sử dụng dữ liệu nhỏ cho pre-trained model.