# Summary

- [Self-implementation](#Self-implementation)
- [Comparison with Keras Implementation](#Comparison-with-Keras-Implementation)
- [References](#References)

In [1]:
import numpy as np
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten
from keras.models import Sequential
from keras.preprocessing import image
from keras.utils.data_utils import get_file
from keras.applications.vgg19 import VGG19, preprocess_input, decode_predictions

INPUT_SIZE = (224, 224, 3)
WEIGHTS_PATH = "https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels.h5"

Using TensorFlow backend.


# Self-implementation 

<img src="images/vgg16_paper.png" width="600">

<img src="images/vgg16_paper_details.png" width="600">

<img src="images/vgg16.png" width="800">

> "_On a system equipped with four NVIDIA Titan Black GPUs, training a single net took 2–3 weeks depending on the architecture."_

In [2]:
model = Sequential()

# block 1
model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=INPUT_SIZE))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

# block 2
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

# block 3
model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

# block 4
model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

# block 5
model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

# top
model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dense(4096, activation='relu'))
model.add(Dense(1000, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 224, 224, 64)      1792      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 224, 224, 64)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 112, 112, 64)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 112, 112, 128)     73856     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 112, 112, 128)     147584    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 56, 56, 128)       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 56, 56, 256)       295168    
__________

In [3]:
weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='cbe5617147190e668d6c5d5026f83318')
model.load_weights(weights_path)

In [4]:
img = image.load_img('data/cat.jpeg', target_size=(INPUT_SIZE[0], INPUT_SIZE[1]))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)

preds = model.predict(preprocess_input(x))
print('Predicted:', decode_predictions(preds, top=3)[0])

Predicted: [('n02123597', 'Siamese_cat', 0.99825221), ('n02124075', 'Egyptian_cat', 0.0015937418), ('n02127052', 'lynx', 0.00011591119)]


# Comparison with Keras Implementation 

In [5]:
model = VGG19(weights='imagenet')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [6]:
img = image.load_img('data/cat.jpeg', target_size=(INPUT_SIZE[0], INPUT_SIZE[1]))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)

preds = model.predict(preprocess_input(x))
print('Predicted:', decode_predictions(preds, top=3)[0])

Predicted: [('n02123597', 'Siamese_cat', 0.99825221), ('n02124075', 'Egyptian_cat', 0.0015937418), ('n02127052', 'lynx', 0.00011591119)]


# References 

- [Original Paper](https://arxiv.org/pdf/1409.1556.pdf)
- [Keras Implementation](https://github.com/keras-team/keras/blob/master/keras/applications/vgg19.py)