In [1]:
import tensorflow as tf
import numpy as np
from scipy.misc import imread, imresize

# class names for ImaneNet dataset
from imagenet_classes import class_names

import matplotlib.pyplot as plt
from pprint import pprint

%matplotlib inline

In [2]:
# dir for tensorboard file (for graph visualization)
logs_path = 'tmp/logs'

if tf.gfile.Exists(logs_path):
    tf.gfile.DeleteRecursively(logs_path)
tf.gfile.MakeDirs(logs_path)

In [3]:
# information about image size
IMG_W = 224
IMG_H = 224
CHANNELS = 3

## Load weights (numpy arrays)

In [4]:
VGG19_weights_file = "vgg19.npy"
VGG19_weights = np.load(VGG19_weights_file, encoding='latin1').item()
print(type(VGG19_weights))

<class 'dict'>


In [5]:
logs_path = 'tmp/logs'

if tf.gfile.Exists(logs_path):
    tf.gfile.DeleteRecursively(logs_path)
tf.gfile.MakeDirs(logs_path)

In [6]:
IMG_W = 224
IMG_H = 224
CHANNELS = 3

In [7]:
VGG19_weights.keys()

dict_keys(['conv3_3', 'fc6', 'conv3_1', 'conv4_3', 'conv5_4', 'conv5_2', 'conv1_2', 'conv4_1', 'conv3_4', 'conv2_1', 'fc7', 'conv2_2', 'conv4_2', 'conv3_2', 'fc8', 'conv1_1', 'conv5_1', 'conv4_4', 'conv5_3'])

In [8]:
print(VGG19_weights['conv5_1'][0].shape)
print(VGG19_weights['conv5_1'][1].shape)

(3, 3, 512, 512)
(512,)


In [9]:
def _get_weights(layer_name, weights):
    """
    Load weights with name 'layer_name'
    weights[layer_name][0] : weights (conv kernel or matrix)
    weights[layer_name][1] : bias vector
    """
    W = weights[layer_name][0]
    b = weights[layer_name][1]
    return W, b

In [13]:
W, b = _get_weights('fc6', VGG19_weights)
print(W.shape)
print(b.shape)

(25088, 4096)
(4096,)


## VGG Construction 

- store tensors in a python dictionary 
    - after each "conv + ReLU"
    - after each "pooling" (max pooling)
    - after each "dense layer + ReLU"
    - after softmax layer

In [None]:
model = {}

In [None]:
sess = tf.InteractiveSession()

In [None]:
# input : batch of 1 image. 
# we are using tf.Variable() to perform optimization on this image (see next notebooks)
model['input'] = tf.get_variable("input", dtype = 'float32', shape=(1,IMG_W, IMG_H, CHANNELS))

### Preprocessing : RGB-> BRG and mean substraction 

In [None]:
def _preprocess(prev_layer):
    """
    Apply preprocessing step : subtract image mean from ImageNet dataset.
    And RGB -> BGR 
    """
    # BGR format 
    VGG_MEAN = [103.939, 116.779, 123.68]
    
    red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=prev_layer)
    bgr = tf.concat(axis=3, values=[blue - VGG_MEAN[0], green - VGG_MEAN[1], red - VGG_MEAN[2],])
    return bgr

In [None]:
model["preprocess"] = _preprocess(model['input'])

### Variable sharing 

In more complex models for style transfer we will use many VGG networks (as encoder, for style and content losses). 

So we can use `tf.get_variable()` and `tf.variable_scope()` for variable sharing. 

See TensorFlow tutorial about variable sharing for more information : 
    - https://www.tensorflow.org/programmers_guide/variable_scope

In [None]:
def _conv2d_relu(prev_layer, layer_name, weights):
    """
    Return the Conv2D + RELU layer using the weights, biases from the VGG
    model at 'layer_name'.
    """
    W_np, b_np = _get_weights(layer_name, weights)
    
    with tf.variable_scope(layer_name):
        
        W = tf.get_variable('W', shape=tuple(W_np.shape),
                            dtype=W_np.dtype, trainable=False,
                            initializer=tf.constant_initializer(W_np))
        
        b = tf.get_variable('b', shape=tuple(b_np.shape),
                            dtype=b_np.dtype, trainable=False,
                            initializer=tf.constant_initializer(b_np))
        
        conv = tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME')
        out = tf.nn.bias_add(conv, b)
        acti =  tf.nn.relu(out, name=layer_name)
        return acti

In [None]:
def _pool(prev_layer, layer_name):
    """
    Return the MaxPooling layer.
    """
    with tf.name_scope(layer_name):
        return tf.nn.max_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [None]:
model['conv1_1']  = _conv2d_relu(model['preprocess'], 'conv1_1', VGG19_weights)
model['conv1_2']  = _conv2d_relu(model['conv1_1'], 'conv1_2', VGG19_weights)
model['pool_1'] = _pool(model['conv1_2'], 'pool_1')

model['conv2_1']  = _conv2d_relu(model['pool_1'], 'conv2_1', VGG19_weights)
model['conv2_2']  = _conv2d_relu(model['conv2_1'], 'conv2_2', VGG19_weights)
model['pool_2'] = _pool(model['conv2_2'], 'pool_2')

model['conv3_1']  = _conv2d_relu(model['pool_2'], 'conv3_1', VGG19_weights)
model['conv3_2']  = _conv2d_relu(model['conv3_1'], 'conv3_2', VGG19_weights)
model['conv3_3']  = _conv2d_relu(model['conv3_2'], 'conv3_3', VGG19_weights)
model['conv3_4']  = _conv2d_relu(model['conv3_3'], 'conv3_4', VGG19_weights)
model['pool_3'] = _pool(model['conv3_4'], 'pool_3')

model['conv4_1']  = _conv2d_relu(model['pool_3'], 'conv4_1', VGG19_weights)
model['conv4_2']  = _conv2d_relu(model['conv4_1'], 'conv4_2', VGG19_weights)
model['conv4_3']  = _conv2d_relu(model['conv4_2'], 'conv4_3', VGG19_weights)
model['conv4_4']  = _conv2d_relu(model['conv4_3'], 'conv4_4', VGG19_weights)
model['pool_4'] = _pool(model['conv4_4'], 'pool_4')

model['conv5_1']  = _conv2d_relu(model['pool_4'], 'conv5_1', VGG19_weights)
model['conv5_2']  = _conv2d_relu(model['conv5_1'], 'conv5_2', VGG19_weights)
model['conv5_3']  = _conv2d_relu(model['conv5_2'], 'conv5_3', VGG19_weights)
model['conv5_4']  = _conv2d_relu(model['conv5_3'], 'conv5_4', VGG19_weights)
model['pool_5'] = _pool(model['conv5_4'], 'pool_5')

In [None]:
def _flatten(prev_layer):
    """
    Reshape layer, flatten operation.
    """
    
    with tf.name_scope('flatten'):
        shape = int(np.prod(prev_layer.get_shape()[1:]))
        return tf.reshape(prev_layer, [-1, shape])

In [None]:
model['flatten'] = _flatten(model['pool_5'])

In [None]:
def _fc_relu(prev_layer, layer_name, weights):
    """
    Return the Dense/Fully Connected  + ReLU layer using the weights, biases from the VGG model
    """
    W_np, b_np = _get_weights(layer_name, weights)
    with tf.variable_scope(layer_name):
        W = tf.get_variable('W', shape=tuple(W_np.shape),
                            dtype=W_np.dtype, trainable=False,
                            initializer=tf.constant_initializer(W_np))
        
        b = tf.get_variable('b', shape=tuple(b_np.shape),
                            dtype=b_np.dtype, trainable=False,
                            initializer=tf.constant_initializer(b_np))
        
        return tf.nn.relu(tf.nn.bias_add(tf.matmul(prev_layer, W), b))

def _fc_linear(prev_layer, layer_name, weights):
        
    W_np, b_np = _get_weights(layer_name, weights)
    with tf.variable_scope(layer_name):
        W = tf.get_variable('W', shape=tuple(W_np.shape),
                            dtype=W_np.dtype, trainable=False,
                            initializer=tf.constant_initializer(W_np))
        
        b = tf.get_variable('b', shape=tuple(b_np.shape),
                            dtype=b_np.dtype, trainable=False,
                            initializer=tf.constant_initializer(b_np))
        
        return tf.nn.bias_add(tf.matmul(prev_layer, W), b)

In [None]:
model['fc6'] = _fc_relu(model['flatten'], 'fc6', VGG19_weights)
model['fc7'] = _fc_relu(model['fc6'], 'fc7', VGG19_weights)
model['fc8'] = _fc_linear(model['fc7'], 'fc8', VGG19_weights)

In [None]:
def _prob(prev_layer, layer_name):
    """
    Returns the softmax.
    """
    with tf.name_scope(layer_name):
        return tf.nn.softmax(prev_layer)

In [None]:
model['prob'] = _prob(model['fc8'], 'prob')

In [None]:
# variable initialization : assign trained values (conv kernels, matrices ans biases vectors) to the graph
sess.run(tf.global_variables_initializer())

In [None]:
# add the graph to TensorBoard visualization
writer = tf.summary.FileWriter(logs_path, sess.graph)

## Image classification

In [None]:
img1 = imread('../images/golden_retriever.jpg')
img1 = imresize(img1, (IMG_W, IMG_H))
img1 = img1.reshape((1, IMG_W, IMG_H, CHANNELS))
print(img1.dtype)
plt.imshow(img1[0])

In [None]:
# assign to the input the image 
_ = sess.run(model['input'].assign(img1))

In [None]:
prob = sess.run(model['prob'])[0]
preds = (np.argsort(prob)[::-1])[0:5]
for p in preds:
    print(class_names[p], prob[p])

In [None]:
with tf.variable_scope("VGG") as vgg_scope :
    W = tf.get_variable('W',shape=(200,200))
    print(type(vgg_scope))

In [None]:
print(isinstance(vgg_scope, tf.VariableScope))
print(isinstance(vgg_scope, str))

In [None]:
with tf.variable_scope(vgg_scope, reuse=True):
    W1 = tf.get_variable("W")

In [None]:
assert W is W1

In [None]:
tf.reset_default_graph()

In [None]:
sess = tf.InteractiveSession()

In [None]:
with tf.name_scope("input1"):
    input1 = tf.Variable([[10,8],[8,4]], dtype='float32', name='input')

with tf.name_scope("input2"):
    input2 = tf.Variable([[4,4],[4,4]], dtype='float32',name='input')

with tf.name_scope("VGG1"): 
    with tf.variable_scope("foo") as foo_scope:
        B = tf.get_variable("B",shape=(2,2), initializer=tf.constant_initializer(0.5))
        print(B.name)
        print(foo_scope.name)
        out1 = tf.matmul(a=input1, b=B, name='out1')
        
with tf.name_scope("VGG2"):
    with tf.variable_scope(foo_scope, reuse=True):
        B1 = tf.get_variable("B")
        print(B1.name)
        out2 = tf.matmul(a=input2, b=B1, name='out2')

assert B1 is B

with tf.name_scope("merge"): 
    final_out = tf.matmul(a=out1, b=out2, name='out')

In [None]:
sess.run(tf.global_variables_initializer())

In [None]:
writer = tf.summary.FileWriter(logs_path, sess.graph)