# VGG16 Model for Project 3 
### By Tianle Zhu

VGG16 is one of the earliest successes of Convolutional Neural Networks in image recognition tasks. It increases network depth to improve model performance.  The network generalizes extraordinary well to image recognition, classification, and localization tasks. The VGG16 pre-trained weights were trained on ImageNet data. 

Advantage: But the simplicity of the VGG-16 architecture made it quite appealing and uniform. 

Disadvantage: The main downside was that it was a pretty large network in terms of the number of parameters you had to train. 

For the VGG16 model, we fine-tune the hyperparameters learning rate and echo. Due to our small dataset, our VGG16 model didn't perform well. To improve the VGG16 model, the next step we will do the data augmentation to increase the data size.  

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
import time
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.externals import joblib
from sklearn.ensemble import GradientBoostingClassifier 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Input
from keras.layers import Conv2D, Dense, Flatten, Dropout, Activation
from tensorflow.keras.layers import BatchNormalization, Reshape, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications import VGG16
import tensorflow as tf
from tensorflow import keras
from keras import Model
import os
from shutil import copyfile, move
from tqdm import tqdm
import h5py
from sklearn import metrics
#from tensorflow.keras import optimizers
from keras import optimizers

Using TensorFlow backend.


In [2]:
batch_size = 16

In [6]:
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    horizontal_flip=True,
    vertical_flip=True)

train_data_dir = "../../data/train_set/data/train"
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    shuffle=True,
    target_size=(64, 64),
    batch_size=batch_size,
    class_mode='binary')

validation_datagen = ImageDataGenerator(rescale=1. / 255)
validation_data_dir = "../../data/train_set/data/validation"
validation_generator = validation_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(64, 64),
    batch_size=batch_size,
    class_mode='binary')

input_shape = (64,64,3)
num_classes = 2

Found 2384 images belonging to 2 classes.
Found 616 images belonging to 2 classes.


In [None]:
vgg = VGG16(input_shape = input_shape, weights = 'imagenet', include_top = False)
for layer in vgg.layers:
    layer.trainable = False

x = Flatten()(vgg.output)
x = Dense(128, activation = 'relu')(x)   # we can add a new fully connected layer but it will increase the execution time.
x = Dense(1, activation = 'sigmoid')(x)  # adding the output layer with sigmoid function 

model = Model(inputs = vgg.input, outputs = x)
# learning rate try  0.01, 0.001, 0.001
#Adam = keras.optimizers.Adam(lr = 0.001)
Adam = optimizers.Adam(lr = 0.05)
model.compile(loss = 'binary_crossentropy', optimizer = Adam, metrics = ['accuracy'])

In [None]:
start_time_train = time.time()
history = model.fit_generator(train_generator,
                   steps_per_epoch = 38,  # this should be equal to total number of images in training set. Change this for better results. 
                   epochs = 10,  # change this for better results
                   class_weight = [85,1], # change this for better results
                   validation_data = validation_generator,
                   validation_steps = 10)
elapsed_time_train = time.time() - start_time_train

In [None]:
train_acc = history.history['acc'][-1]
validation_acc = history.history['val_acc'][-1]

In [None]:
validation_generator.reset()

In [None]:
start_time_test = time.time()
pred = model.predict_generator(validation_generator,verbose = 1, steps= 616/16)
elapsed_time_test = time.time() - start_time_test

In [None]:
model.evaluate_generator(validation_generator,steps = 616/16)

In [None]:
# Calculate AUC
fpr, tpr, thresholds = metrics.roc_curve(validation_generator.classes, pred)
AUC_vaule = metrics.auc(fpr, tpr)
AUC_vaule

In [None]:
print("For the VGG16 model peformance")
print("Training time:", elapsed_time_train)
print("Testing time:", elapsed_time_test)
print("Training Accuracy:", train_acc)
print("Validation Accuracy:", validation_acc)
print("AUC for VGG16 moldel:", AUC_vaule)

In [None]:
# testing_datagen = ImageDataGenerator(rescale=1. / 255)
# testing_data_dir = "../data/train_set/data/test"

# testing_generator = testing_datagen.flow_from_directory(
#     testing_data_dir,
#     target_size=(64, 64),
#     batch_size=batch_size,
#     class_mode='binary')