# Hands-on tutorial on Convnets with TensorFlow
Adapted from assignments 2 and 3 of CS231N: 

http://cs231n.github.io/assignment2/

http://cs231n.github.io/assignment3/

Pre-requisites:

CS231N:
http://cs231n.github.io/neural-networks-3

http://cs231n.github.io/convolutional-networks/




Python 2.7: https://www.codecademy.com/tracks/python

Numpy: https://www.reddit.com/r/Python/comments/1wwwss/100_numpy_exercises/

TensorFlow:
https://github.com/aymericdamien/TensorFlow-Examples
 

Download assigment 2: http://vision.stanford.edu/teaching/cs231n/assignment2.zip


In [1]:


%matplotlib inline

import numpy as np
from sklearn.preprocessing import StandardScaler  
import pandas as pd

from matplotlib import pyplot as plt

from cs231n.data_utils import load_CIFAR_batch


from __future__ import absolute_import
from __future__ import print_function

import tensorflow as tf


# Load the data
Load up part of CIFAR-10 data, so we can use it to train a classifier.

In [None]:

cifar10_batch_1 = 'cs231n/datasets/cifar-10-batches-py/data_batch_1'
X_train, y_train = load_CIFAR_batch(cifar10_batch_1)
    
    

# Visualize some images


In [None]:
img = 255- X_train[np.random.randint(3000)] 

plt.imshow(img)

# Logistic Regression


In [None]:
X_train = X_train.astype(np.float64)



y_train=pd.get_dummies(y_train)


y_train=np.array(y_train)
y_train = y_train.astype(np.float64)


In [None]:
X_train[244]



In [None]:
X_train -= np.mean(X_train)
X_train /= np.std(X_train, axis = 0)

In [None]:

X_valid, y_valid= X_train[3000:4000], y_train[3000:4000] 

X_train, y_train = X_train[:3000], y_train[:3000] #smaller dataset to speed-up experimentation


In [None]:
type(X_train[344,5])

In [None]:
X_train=np.reshape(X_train,(3000,3*32*32))

In [None]:
X_valid=np.reshape(X_valid,(1000,3*32*32))

In [None]:
# Parameters
learning_rate = 0.01


# tf Graph Input
x = tf.placeholder("float", [None, 3*32*32]) # mnist data image of shape 28*28=784
y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes

# Create model

# Set model weights
W = tf.Variable(tf.zeros([3*32*32, 10]))
b = tf.Variable(tf.zeros([10]))

# Construct model
logits = tf.matmul(x, W) + b

# Minimize error using cross entropy
# Cross entropy
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y))


In [None]:
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) 

In [None]:
# Initializing the variables
init = tf.initialize_all_variables()

training_epochs = 25
batch_size = 30
display_step = 1

# Launch the graph
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = 3000 #int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(0,total_batch,batch_size):
            step=min(i+batch_size,len(X_train))
            batch_xs, batch_ys = X_train[i:step], y_train[i:step]
            # Fit training using batch data
            sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
            # Compute average loss
            avg_cost += sess.run(loss, feed_dict={x: batch_xs, y: batch_ys})/total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))

    print("Optimization Finished!")



    # Test model
    correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(logits), 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({x: X_valid, y: y_valid}))

# Train a ConvNet!

The architecture is conv-relu-pool-Dense-softmax, where the conv layer uses stride-1 "same" convolutions to preserve the input size; the pool layer uses non-overlapping
  2x2 pooling regions.

In [2]:
cifar10_batch_1 = 'cs231n/datasets/cifar-10-batches-py/data_batch_1'
X_train, y_train = load_CIFAR_batch(cifar10_batch_1)
    
y_train=pd.get_dummies(y_train)

y_train=np.array(y_train)

In [3]:
# Don't forget data normalization!

X_train -= np.mean(X_train)
X_train /= np.std(X_train, axis = 0)

In [4]:
train_size=3000

X_valid, y_valid= X_train[train_size:train_size+2000], y_train[train_size:train_size+2000] 

X_train, y_train = X_train[:train_size], y_train[:train_size]    

In [5]:
# number of convolutional filters to use at each layer
nb_filters = 32
# level of pooling to perform at each layer (POOL x POOL)
nb_pool = 2
# level of convolution to perform at each layer (CONV x CONV)
nb_conv =  3
# the CIFAR10 images are RGB
image_dimensions = 3

n_classes = 10

# Parameters
#learning_rate = 0.01
#training_iters = 1000
batch_size = 100
display_step = 1
#suggested parameters:
# nb_epoch=30, batch_size=10, validation_split=0.33333
# lr=0.0001, decay=1e-6, momentum=0.9


In [6]:

x = tf.placeholder("float", shape=[None, 32,32,3])
y = tf.placeholder("float", shape=[None, 10])

keep_prob = tf.placeholder(tf.types.float32) #dropout parameter

##Your code here##


In [51]:
# Create model
def conv2d(img, w, b):
    return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(img, w, strides=[1, 1, 1, 1], 
                                                  padding='SAME'),b))

def max_pool(img, k):
    return tf.nn.max_pool(img, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

def conv_net(_X, _weights, _biases, _dropout):
#def conv_net(_X, _weights, _biases):
    
#    _X = tf.reshape(_X, shape=[-1, 28, 28, 1])
    # Convolution Layer
    conv1 = conv2d(_X, _weights['wc1'], _biases['bc1'])
    # Max Pooling (down-sampling)
    conv1 = max_pool(conv1, k=2)
    # Apply Dropout
    conv1 = tf.nn.dropout(conv1, _dropout)

    # Fully connected layer
    # Reshape conv1 output to fit dense layer input
    dense1 = tf.reshape(conv1, [-1, _weights['wd1'].get_shape().as_list()[0]])
  #  dense1 = tf.reshape(conv1, [-1, 16*16*30])
    # Relu activation
 #   dense1 = tf.nn.relu(tf.add(tf.matmul(dense1, _weights['wd1']), _biases['bd1']))
    # Apply Dropout
    dense1 = tf.nn.dropout(dense1, _dropout) # Apply Dropout

    # Output, class prediction
    out = tf.add(tf.matmul(dense1, _weights['out']), _biases['out'])
    return out

In [52]:
# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 20 outputs
    'wc1': tf.Variable(tf.random_normal([3, 3, 3, 30])), 
    # fully connected, 14*14*20 inputs, 512 outputs
    'wd1': tf.Variable(tf.random_normal([16*16*30, 16*16*30])), 
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([16*16*30, n_classes])) 
}

biases = {
    'bc1': tf.Variable(tf.truncated_normal([30])),
    'bd1': tf.Variable(tf.truncated_normal([16*16*30])),
    'out': tf.Variable(tf.truncated_normal([n_classes]))
}

In [53]:
# Parameters
learning_rate = 0.0001

# Network Parameters
n_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.5 # Dropout, probability to keep units

In [54]:
# Construct model
pred = conv_net(x, weights, biases, dropout)

In [55]:
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) 
#optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [56]:
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [57]:
# Initializing the variables
init = tf.initialize_all_variables()

In [58]:
# Create a summary to monitor cost function
train_loss=tf.scalar_summary("train loss", cost)
val_loss=tf.scalar_summary("val loss", cost)
acc_summ=tf.scalar_summary("acc", accuracy)

# Merge all summaries to a single operator
#merged_summary_op = tf.merge_all_summaries()

In [60]:
num_epochs=15
# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    
    # Set logs writer into folder /tmp/tensorflow_logs
    summary_writer = tf.train.SummaryWriter('/tmp/tensorflow_logs3', graph_def=sess.graph_def)
    
    # Keep training until reach max iterations
    for epoch in range(num_epochs):
      avg_acc=0.
      avg_loss=0.
      total_batch = int(train_size/batch_size)
      for batches in range(total_batch):
        bat=min(batches+(batch_size),train_size)    
        batch_xs, batch_ys = X_train[batches:bat],y_train[batches:bat]
        # Fit training using batch data
        sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout})
        acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
            # Calculate batch loss
        loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
        
        avg_acc= avg_acc*float(batches)/(float(batches)+1) + acc/(float(batches)+1)
        
      
        avg_loss= avg_loss*float(batches)/(float(batches)+1) + loss/(float(batches)+1)
        
        
        
        # Write logs at every iteration
        summary_str = sess.run(train_loss, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
        summary_val = sess.run(val_loss, feed_dict={x: X_valid[:200], y: y_valid[:200], keep_prob: 1.})
        
        summary_train_acc = sess.run(acc_summ, feed_dict={x: batch_xs, y: batch_ys,keep_prob: 1.})
        summary_val_acc = sess.run(acc_summ, feed_dict={x: X_valid[:200], y: y_valid[:200],keep_prob: 1.})

        summary_writer.add_summary(summary_str, epoch*total_batch + batches)
        summary_writer.add_summary(summary_val, epoch*total_batch + batches)
        summary_writer.add_summary(summary_train_acc, epoch*total_batch + batches)
        summary_writer.add_summary(summary_val_acc, epoch*total_batch + batches)
                                             
      print("epoch " + str(epoch+1) + ", Train. Loss= " + \
                  "{:.6f}".format(avg_loss) + ", Train. Acc= " + "{:.5f}".format(avg_acc))

    print("Optimization Finished!")
    # Calculate accuracy for 256 mnist test images
    print("Test Acc:", sess.run(accuracy, feed_dict={x: X_valid,y: y_valid, keep_prob: 1.}))

epoch 1, Train. Loss= 878.349164, Train. Acc= 0.09233
epoch 2, Train. Loss= 798.400403, Train. Acc= 0.09633
epoch 3, Train. Loss= 747.689707, Train. Acc= 0.10233
epoch 4, Train. Loss= 691.092399, Train. Acc= 0.10933
epoch 5, Train. Loss= 642.241565, Train. Acc= 0.11533
epoch 6, Train. Loss= 603.782351, Train. Acc= 0.10633
epoch 7, Train. Loss= 569.361747, Train. Acc= 0.13200
epoch 8, Train. Loss= 535.587983, Train. Acc= 0.11533


KeyboardInterrupt: 

Using Tensorboard, modify your code above to plot the loss training/validation curves, and the accuracy  training/validation curves. It should show clear overfitting:



# Data Augmentation


Another way to reduce overfitting is to implement data augmentation. Since we have very little training data, we will use what little training data we have to generate artificial data, and use this artificial data to train our network.


In [95]:
def data_augment(img_batch):
    length=int(tf.Tensor.get_shape(img_batch)[0])
    for k in range(length):
        toss=np.random.randint(0,2)
        if toss==0:
            tf.gather(img_batch,k)= tf.image.flip_left_right(tf.gather(img_batch,k)
    
    return img_batch

SyntaxError: invalid syntax (<ipython-input-95-4447e0de7ec0>, line 8)

In [84]:
c = tf.constant([[[[1.0, 2.0], [3.0, 4.0], [3.0, 4.0]],[[1.0, 2.0], [3.0, 4.0], [3.0, 4.0]]],[[[1.0, 2.0], [3.0, 4.0], [3.0, 4.0]],[[1.0, 2.0], [3.0, 4.0], [3.0, 4.0]]]])

In [92]:
tf.Tensor.get_shape(tf.gather(c, 1))

TensorShape([Dimension(2), Dimension(3), Dimension(2)])

In [90]:
sess=tf.Session()
sess.run(c)

array([[[[ 1.,  2.],
         [ 3.,  4.],
         [ 3.,  4.]],

        [[ 1.,  2.],
         [ 3.,  4.],
         [ 3.,  4.]]],


       [[[ 1.,  2.],
         [ 3.,  4.],
         [ 3.,  4.]],

        [[ 1.,  2.],
         [ 3.,  4.],
         [ 3.,  4.]]]], dtype=float32)

In [98]:
sess.run(tf.image.flip_left_right(tf.gather(c, 1)))

array([[[ 3.,  4.],
        [ 3.,  4.],
        [ 1.,  2.]],

       [[ 3.,  4.],
        [ 3.,  4.],
        [ 1.,  2.]]], dtype=float32)

In [86]:
data_augment(c)

ValueError: Shape TensorShape([Dimension(2), Dimension(2), Dimension(3), Dimension(2)]) must have rank 1

In [85]:
tf.Tensor.get_shape(c)

TensorShape([Dimension(2), Dimension(2), Dimension(3), Dimension(2)])

In [37]:
pred = conv_net(data_augment(x), weights, biases,dropout)

TypeError: __int__ should return int object

Are you satisfied by the improvement? Comment your result:

 (your comment here)