The aim of this exercise is to train you in debugging networks using the good old print function and also tensorboard. To simulate poor training, we will train a multilayer perceptron using the CIFAR data.

1. Use the CIFAR data set reader from the first homework and read the CIFAR-10 files again. 
2. Apply random noise to the image 
3. Convert the image to float and scale to [0.0, 1.0] by dividing the pixel values by the highest pixel value.
4. Convert all labels to onehot encoding
5. Build a 3-layer multilayer perceptron of size [512, 256, 128]. 
6. Create a tensorboard summary for plotting the histogram of the weights of the three layers.
7. Also write the cost / loss at the end of each epoch to tensorboard.
8. Train the network with learning rates of [0.1, 0.01, 0.001]. You will notice that the network will not converge well.
9. Submit the snapshot of the histograms for the three learning rates. Describe your observations.

In [5]:
import glob
import os
import matplotlib.pyplot as plt
import numpy as np
import operator
import functools
import random
import tensorflow as tf
import pandas as pd
import datetime
from itertools import chain
from sklearn.preprocessing import OneHotEncoder

In [6]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [7]:
noofepochs = 128
batch_size = 16
number_images_batch = 10000
image_shape = (32, 32, 3,)
image_size = functools.reduce(operator.mul, image_shape)
print(image_size)

3072


In [8]:
# Read all data
alldata = np.zeros((5*number_images_batch, image_size), dtype=np.int)
alllabels = []
begin = 0
end = number_images_batch
for filename in glob.glob("data_batch*"):
    print(filename)
    d1 = unpickle(filename)
    labels = d1[b'labels']
    data = d1[b'data']
    print(data.shape)
    alldata[begin:end,:] = data
    alllabels.extend(labels)
    begin = end
    end = end+number_images_batch
alldata = alldata.astype(np.uint8)
print(alldata.shape, type(alllabels))
print(len(alllabels))



data_batch_1
(10000, 3072)
data_batch_2
(10000, 3072)
data_batch_3
(10000, 3072)
data_batch_4
(10000, 3072)
data_batch_5
(10000, 3072)
(50000, 3072) <class 'list'>
50000


In [37]:
learning_rate = 0.1
#learning_rate = 0.01
#learning_rate = 0.001

epochs = 10
batch_size = 512
noofbatches = 10

ninput = 3072
noutput = 10
nhidden1 = 512
nhidden2 = 256
nhidden3 = 128

In [38]:
def randomNoise(data):
    noise = np.random.normal(0, 3, data.size).reshape([-1, 3072])
    data = data + noise
    return data

In [39]:
randomNoise(alldata)

array([[ 60.47020784,  44.45820698,  52.80229117, ..., 138.75301528,
         78.72059734,  69.98884811],
       [150.89699629, 122.18375752, 104.39244165, ..., 136.67728545,
        141.87877197, 140.86316297],
       [252.55119394, 253.18141998, 253.2041787 , ...,  86.21418686,
         79.72564839,  82.01534999],
       ...,
       [ 32.94417252,  42.06115004,  43.58377499, ...,  76.34587291,
         62.47063509,  44.23388592],
       [194.32297041, 186.95180031, 182.0346176 , ..., 172.25381787,
        170.05516358, 167.16492175],
       [226.5123547 , 238.74990238, 231.61185957, ..., 173.83186012,
        165.00423619, 164.71086249]])

In [40]:
print('alldata -> {}'.format(alldata))
data1 = np.array(alldata/255, dtype=np.float32)
print(data1)

alldata -> [[ 59  43  50 ... 140  84  72]
 [154 126 105 ... 139 142 144]
 [255 253 253 ...  83  83  84]
 ...
 [ 35  40  42 ...  77  66  50]
 [189 186 185 ... 169 171 171]
 [229 236 234 ... 173 162 161]]
[[0.23137255 0.16862746 0.19607843 ... 0.54901963 0.32941177 0.28235295]
 [0.6039216  0.49411765 0.4117647  ... 0.54509807 0.5568628  0.5647059 ]
 [1.         0.99215686 0.99215686 ... 0.3254902  0.3254902  0.32941177]
 ...
 [0.13725491 0.15686275 0.16470589 ... 0.3019608  0.25882354 0.19607843]
 [0.7411765  0.7294118  0.7254902  ... 0.6627451  0.67058825 0.67058825]
 [0.8980392  0.9254902  0.91764706 ... 0.6784314  0.63529414 0.6313726 ]]


In [41]:
alllabels = np.array(alllabels)
onehot = np.eye(10)[alllabels]
print(onehot)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]


In [44]:
X = tf.placeholder(tf.float32, [None, ninput])
Y = tf.placeholder(tf.float32, [None, noutput])

In [45]:
weights = \
{
        'h1': tf.Variable(tf.random_normal([ninput, nhidden1])),
        'h2': tf.Variable(tf.random_normal([nhidden1, nhidden2])),
        'h3': tf.Variable(tf.random_normal([nhidden2, nhidden3])),
        'out': tf.Variable(tf.random_normal([nhidden3, noutput]))
}

biases = \
{
    'b1': tf.Variable(tf.random_normal([nhidden1])),
    'b2': tf.Variable(tf.random_normal([nhidden2])),
    'b3': tf.Variable(tf.random_normal([nhidden3])),
    'out': tf.Variable(tf.random_normal([noutput]))
}

In [46]:
def multiperceptron(X):
    l1 = tf.nn.sigmoid(tf.add(tf.matmul(X, weights['h1']), biases['b1']))
    l2 = tf.nn.sigmoid(tf.add(tf.matmul(l1, weights['h2']), biases['b2']))
    l3 = tf.nn.sigmoid(tf.add(tf.matmul(l2, weights['h3']), biases['b3']))
    outl = tf.add(tf.matmul(l3, weights['out']), biases['out'])
    return outl
    
model = multiperceptron(X)


In [47]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = model, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate)
train_min = optimizer.minimize(loss)

correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))



In [48]:
writer = tf.summary.FileWriter("./logs", graph=tf.get_default_graph())
tf.summary.scalar("loss", loss)

tf.summary.histogram("weight_1",weights['h1'])
tf.summary.histogram("weight_2",weights['h2'])
tf.summary.histogram("weight_3",weights['h3'])
tf.summary.scalar("loss", loss)

init = tf.global_variables_initializer()

In [50]:
def getbatch(xval, yval, batchsize):
    arraylen = len(xval)
    count = 0 
    while count < arraylen/batchsize:
        start = random.randint(0, arraylen-batchsize-1)
        count += 1
        yield (xval[start:start+batchsize], yval[start:start+batchsize])

In [36]:
#0.1
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(epochs):
        for (x, y) in getbatch(alldata, onehot, batch_size):
            sess.run(train_min, feed_dict={X:x, Y:y})
        losscalc, accuracycalc = sess.run([loss, accuracy], feed_dict={X:x, Y:y})
        print("Epoch: %d, Loss: %0.4f, Accuracy: %0.4f"%(epoch, losscalc, accuracycalc))
            
    accuracycalc = sess.run(accuracy, feed_dict={X: alldata, Y: onehot})
    print("Testing accuracy: %0.4f"%(accuracycalc))

Epoch: 0, Loss: 2.3375, Accuracy: 0.0781
Epoch: 1, Loss: 2.3220, Accuracy: 0.1016
Epoch: 2, Loss: 2.3293, Accuracy: 0.1172
Epoch: 3, Loss: 2.3605, Accuracy: 0.1113
Epoch: 4, Loss: 2.3670, Accuracy: 0.0898
Epoch: 5, Loss: 2.3515, Accuracy: 0.0938
Epoch: 6, Loss: 2.3199, Accuracy: 0.1211
Epoch: 7, Loss: 2.3580, Accuracy: 0.0898
Epoch: 8, Loss: 2.3369, Accuracy: 0.1035
Epoch: 9, Loss: 2.3560, Accuracy: 0.1113
Testing accuracy: 0.1000
