# Chapter 5: Transfer Learning
## Ex1: Pre-trained models
* Tạo 1 bộ dữ liệu Dog/Cat lớn (dataset ở chapter4)
* Chọn 1 pre-trained model - VGG16 - để xây dựng model dự đoán Dog/Cat
* Lưu model vừa xây dựng xong.

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/My Drive/LDS8_K275_ONLINE_NGUYENTHIKIMHOANG/Week_3/Chapter5

/content/drive/My Drive/LDS8_K275_ONLINE_NGUYENTHIKIMHOANG/Week_3/Chapter5


In [None]:
from tensorflow.keras import applications
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from tensorflow.keras import backend as k
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

## Large dataset ~ 10.000 images: 2 classes

### Strategy:  2

VGG16: image(224,2244)

In [None]:
img_width,img_height = 224,224
train_data_dir = 'datasetfull/training_set'
validation_data_dir = 'datasetfull/test_set'

In [None]:
# http://www.tensorflow.org/api_docs/python/tf/keras/applications/VGG16
model = applications.VGG16(weights = 'imagenet',
                           include_top = False, #whether to include the 3 fully-connected layers at the top of the network
                           input_shape = (img_width,img_height,3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

## New dataset is large and similar to original dataset

In [None]:
# So lets freeze 6 first layers of the VGG16
for layer in model.layers[:6]:
  layer.trainable = False

In [None]:
# adding custom layers - ANN
x = model.output
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(1024,activation = 'relu')(x)
predictions = Dense(2, activation = 'softmax')(x)

In [None]:
# creating the final model
model_final = Model(inputs = model.input,outputs = predictions)

In [None]:
# compile the model
model_final.compile(loss = 'categorical_crossentropy',
                    optimizer = 'adam',
                    metrics = ['accuracy'])

In [None]:
# Initiate the train and test generators with data Augumentation
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   horizontal_flip = True,
                                   fill_mode = 'nearest',
                                   zoom_range = 0.3,
                                   width_shift_range = 0.3,
                                   height_shift_range=0.3,
                                   rotation_range = 30)
test_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
train_generator = train_datagen.flow_from_directory(train_data_dir,
                                                    target_size = (img_height,
                                                                   img_width),
                                                    batch_size = 32,
                                                    class_mode = 'categorical')

validation_generator = test_datagen.flow_from_directory(validation_data_dir,
                                                        target_size = (img_height,
                                                                       img_width),
                                                        class_mode = 'categorical')

Found 7805 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
callbacks = [EarlyStopping(monitor='val_loss',patience=5),ModelCheckpoint('Checkpoint_vgg16.h5',save_best_only=True)]

In [None]:
import datetime

In [None]:
t0 = datetime.datetime.now()
print(t0)

2022-06-01 06:54:33.031745


In [None]:
# Train the model
history = model_final.fit(train_generator,
                epochs = 100,
                batch_size = 64,
                validation_data = validation_generator,
                verbose = 1,
                callbacks = callbacks)

Epoch 1/100
Epoch 2/100
 10/244 [>.............................] - ETA: 2:43:49 - loss: 0.6926 - accuracy: 0.5156

In [None]:
t1 = datetime.datetime.now()
print('Thoi gian chay mo hinh: ', t1-t0)

In [None]:
import pandas as pd

In [None]:
history_df = pd.DataFrame(history.history)
history_df.tail()

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
7,0.69284,0.513901,0.693348,0.5
8,0.692926,0.513901,0.693414,0.5
9,0.692931,0.50993,0.693891,0.5
10,0.692976,0.513901,0.693906,0.5
11,0.692848,0.513901,0.693528,0.5


In [None]:
from tensorflow.keras.models import load_model
# creates a HDF5 file 'my_model.h5'
model_final.save('dog_cat_vgg16_big.h5')
print('Save!!!')

Save!!!
