In [1]:
import numpy as np
import h5py

In [2]:
with h5py.File('Datasets/dogsvscats/train.hdf5') as f:
    X_train = f['X'][:2000]
    y_train = f['y'][:2000]
with h5py.File('Datasets/dogsvscats/test.hdf5') as f:
    X_test = f['X'][:]
    y_test = f['y'][:]

In [3]:
X_train.shape

(2000, 150, 150, 3)

In [4]:
X_test.shape

(1250, 150, 150, 3)

In [5]:
y_train.shape

(2000,)

In [6]:
y_test.shape

(1250,)

In [7]:
from sklearn.preprocessing import OneHotEncoder

In [8]:
encoder = OneHotEncoder(sparse=False)

In [9]:
encoder.fit(y_train.reshape(-1, 1))

OneHotEncoder(categorical_features='all', dtype=<class 'numpy.float64'>,
       handle_unknown='error', n_values='auto', sparse=False)

In [10]:
y_train = encoder.transform(y_train.reshape(-1, 1))
y_test = encoder.transform(y_test.reshape(-1, 1))

In [11]:
y_train.shape

(2000, 2)

In [12]:
def get_next_batch(X, y, size):
    m = X.shape[0]
    index = np.random.permutation(np.arange(m))
    for i in range(m//size):
        batch_i = index[i*size: i*size+size]
        yield X[batch_i], y[batch_i]

### Training

In [13]:
import tensorflow as tf

In [14]:
def conv_layer(X, channels, strides=[1, 1, 1, 1], kernel_initializer=None, reuse=False, name="conv"):
    with tf.variable_scope("Var_"+name, reuse=reuse):
        
        if kernel_initializer:
            initializer = kernel_initializer()
        else:
            initializer = tf.random_normal_initializer(stddev=0.01)
        
        shape = [3, 3, X.shape[3].value, channels]
        
        if reuse:
            W = tf.get_variable('W')
            b = tf.get_variable('B')
        else:
            W = tf.get_variable(shape=shape, initializer=initializer, name='W')
            b = tf.get_variable(initializer=tf.constant(0.1, shape=[channels]), name='B')

        conv = tf.nn.conv2d(X, W, strides=strides, padding="SAME")
        act = tf.nn.relu(conv + b)
        
        tf.summary.histogram("weights", W)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        
        return act

In [15]:
def fc_layer(X, units, activation=None, kernel_initializer=None, regularizer=None, reuse=False, name='FC'):
    
    with tf.variable_scope("Var_"+name, reuse=reuse):

        if kernel_initializer:
            initializer = kernel_initializer()
        else:
            initializer = tf.random_normal_initializer(stddev=0.01)

        if reuse:
            W = tf.get_variable('W')
            b = tf.get_variable('B')
        else:
            W = tf.get_variable(shape=[X.shape[1].value, units], initializer=initializer, regularizer=regularizer, name='W')
            b = tf.get_variable(initializer=tf.constant(0.1, shape=[units]), name='B')

        Z = tf.add(tf.matmul(X, W), b)

        if activation:
            A = activation(Z)
        else:
            A = Z

        tf.summary.histogram("weights", W)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", A)

    return A

In [16]:
def conv_model(X, y, regularization_rate=0.0, drop_rate=0.0, reuse=False, is_training=True, name="Conv_model"):
    with tf.name_scope(name):
        regularizer = tf.contrib.layers.l2_regularizer(scale=regularization_rate)
        kernel_initializer = tf.contrib.layers.xavier_initializer
        activation = tf.nn.relu
        
        X = conv_layer(X, channels=64, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_a1")
        X = conv_layer(X, channels=64, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_a2")
        X = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

        X = conv_layer(X, channels=128, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_b1")
        X = conv_layer(X, channels=128, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_b2")
        X = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

        X = conv_layer(X, channels=256, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_c1")
        X = conv_layer(X, channels=256, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_c2")
        X = conv_layer(X, channels=256, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_c3")
        X = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

        X = conv_layer(X, channels=512, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_d1")
        X = conv_layer(X, channels=512, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_d2")
        X = conv_layer(X, channels=512, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_d3")
        X = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 

        X = conv_layer(X, channels=512, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_e1")
        X = conv_layer(X, channels=512, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_e2")
        X = conv_layer(X, channels=512, kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_e3")
        X = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")  
        
        dims = np.prod(X.shape.as_list()[1:])
        X = tf.reshape(X, [-1, dims])
        
        X = fc_layer(X, 4096, activation, kernel_initializer, regularizer, reuse=reuse, name=f"fc_1")
        X = tf.layers.dropout(X, rate=drop_rate, training=is_training)
        X = fc_layer(X, 4096, activation, kernel_initializer, regularizer, reuse=reuse, name=f"fc_2")
        X = tf.layers.dropout(X, rate=drop_rate, training=is_training)
        
        # In the original VGG the labels had 1000 classes, here is just binary classification
        output = fc_layer(X, y.shape[1].value, kernel_initializer=kernel_initializer, regularizer=regularizer, reuse=reuse, name=f"last_fc" )
        tf.summary.histogram('logits', output)
    return output

In [17]:
def mnist_model(X_train, y_train, learning_rate, epochs, batch_size,
                l2_rate, drop_rate, logdir, X_test, y_test, print_acc=False):
    tf.reset_default_graph()
    with tf.Session() as sess:
        
        n_features = X_train.shape[1]
        n_labels = y_train.shape[1]
        
        X = tf.placeholder(tf.float32, shape=[None, 150, 150, 3], name="X")
        paddings = tf.constant([[0, 0], [5, 5], [5, 5], [0, 0]]) 
        X_image = tf.pad(X, paddings)#I'm padding the image to be 160*160 so after 5 maxpooling it will be 5*5(160/2^5)
        y = tf.placeholder(tf.float32, shape=[None, n_labels], name="labels")


        train_logits = conv_model(X_image, y, regularization_rate=l2_rate, drop_rate=drop_rate, reuse=False, is_training=True, name="Conv_Model")
        test_logits = conv_model(X_image, y, reuse=True, is_training=False, name="Conv_Eval_Model")

        with tf.name_scope("loss"):
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
                                  logits=train_logits, labels=y), name="loss")
            tf.summary.scalar("loss", loss)

        with tf.name_scope("train"):
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        with tf.name_scope("test_accuracy"):
            correct_prediction = tf.equal(tf.argmax(test_logits, 1), tf.argmax(y, 1))
            test_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            tf.summary.scalar("test_accuracy", test_accuracy)
        
        with tf.name_scope("train_accuracy"):
            correct_prediction = tf.equal(tf.argmax(train_logits, 1), tf.argmax(y, 1))
            train_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            tf.summary.scalar("train_accuracy", train_accuracy)

        summ = tf.summary.merge_all()

        saver = tf.train.Saver()

        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter(f"./summary/{logdir}")
        writer.add_graph(sess.graph)

        for epoch in range(1, epochs+1):
            for X_batch, y_batch in get_next_batch(X_train, y_train, batch_size):
                sess.run(train_step, {X: X_batch, y: y_batch})
                
            if print_acc and epoch%10 == 0:
                train_sample = get_next_batch(X_train, y_train, 32).__next__()
                test_sample = get_next_batch(X_test, y_test, 32).__next__()
                
                test_acc, s = sess.run([test_accuracy, summ],  {X: test_sample[0], y: test_sample[1]})
                train_l, train_acc = sess.run([loss, train_accuracy], {X: train_sample[0], y: train_sample[1]})
                print("Epoch:", np.round(epoch, 4),
                      "Train Loss:", np.round(train_l, 4),
                      "Train accuracy:", np.round(train_acc, 4),
                      "Test accuracy:", np.round(test_acc,4))
                writer.add_summary(s, epoch)
                saver.save(sess, f"./model/{logdir}")


In [None]:
mnist_model(X_train=X_train, y_train=y_train, 
            learning_rate=1e-5, epochs=300, batch_size=32, 
            l2_rate=0.0, drop_rate=0.5, logdir='VGG-16', X_test=X_test, y_test=y_test, print_acc=True)

INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
Epoch: 10 Train Loss: 0.7206 Train accuracy: 0.4375 Test accuracy: 0.4688
Epoch: 20 Train Loss: 0.7306 Train accuracy: 0.5 Test accuracy: 0.5312
Epoch: 30 Train Loss: 0.6406 Train accuracy: 0.625 Test accuracy: 0.5


In [None]:
np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]) #this outputs the number of variables(parameters) of the network