In [1]:
%tensorflow_version 1.x
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt


TensorFlow 1.x selected.


In [8]:
def loadData():
    with np.load('notMNIST.npz') as dataset:
        Data, Target = dataset['images'], dataset['labels']
        posClass = 2
        negClass = 9
        dataIndx = (Target==posClass) + (Target==negClass)
        Data = Data[dataIndx]/255.
        Target = Target[dataIndx].reshape(-1, 1)
        Target[Target==posClass] = 1
        Target[Target==negClass] = 0
        np.random.seed(421)
        randIndx = np.arange(len(Data))
        np.random.shuffle(randIndx)
        Data, Target = Data[randIndx], Target[randIndx]
        trainData, trainTarget = Data[:3500], Target[:3500]
        validData, validTarget = Data[3500:3600], Target[3500:3600]
        testData, testTarget = Data[3600:], Target[3600:]
    return trainData, validData, testData, trainTarget, validTarget, testTarget


In [3]:
def y_hat(z):
  sigma = 1 / (1+np.exp(-z))
  return sigma
  
def accuracy(W,x,b,y):
  y_h = y_hat(np.matmul(x,W)+b)
  acc = np.sum((y_h>=0.5)==y)/np.shape(y)[0] 
  return acc

def CELoss(x,y,W,b,reg):
  z = tf.matmul(x,W) + b
  CEloss= tf.losses.sigmoid_cross_entropy(y, tf.sigmoid(z))
  regularizer =reg*tf.nn.l2_loss(W) 
  loss = CEloss + regularizer
  return loss


In [5]:
def buildGraph(beta1=None, beta2=None, epsilon=None):
  
  minibatch_size = 700
  alpha = 0.001
  W = tf.Variable(tf.random.truncated_normal(shape=(784, 1), mean=0.0, stddev=0.5, dtype=tf.float32,seed= None, name="W"))
  b = tf.Variable(tf.zeros(1),name="b")
  reg = 0
  x = tf.placeholder(tf.float32, (None, 784),name = "x")
  y = tf.placeholder(tf.float32, (None, 1),name = "y")
  
  valid_data = tf.placeholder(tf.float32, shape=(100, 784), name = "valid_data")
  valid_target = tf.placeholder(tf.int8, shape=(100, 1), name = "valid_target")

  test_data = tf.placeholder(tf.float32, shape=(145, 784), name = "test_data")
  test_target = tf.placeholder(tf.int8, shape=(145, 1), name="test_target")

  z = tf.matmul(x,W) + b
  y_hat = tf.sigmoid(z)
  loss = CELoss(x,y,W,b,reg)

  z_valid = tf.matmul(valid_data,W) + b
  y_hat_valid = tf.sigmoid(z_valid)
  validLoss = CELoss(valid_data,valid_target,W,b,reg)

  z_test = tf.matmul(test_data,W) + b
  y_hat_test = tf.sigmoid(z_test)
  testLoss = CELoss(test_data,test_target,W,b,reg)
  
  optimizer = tf.train.AdamOptimizer(learning_rate=alpha).minimize(loss) 
  
  with tf.Session() as session:
    tf.global_variables_initializer().run()
    
    trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()
    validData = validData.reshape((validData.shape[0],validData.shape[1]*validData.shape[2]))
    trainData = trainData.reshape((trainData.shape[0],trainData.shape[1]*trainData.shape[2]))
    testData = testData.reshape((testData.shape[0],testData.shape[1]*testData.shape[2]))
    
    trainLossArr = []
    validLossArr = []
    testLossArr = []
    trainAccuracy = []
    validAccuracy = []
    testAccuracy = []

    # SGD implementation
    epochs = 700
    N = trainData.shape[0]
   
    # total number of batches required
    batchRange = int(N/minibatch_size) 
    
    for step in range(epochs):
      #shuffling data
      newInd = np.arange(len(trainData))
      np.random.shuffle(newInd)
      trainData, trainTarget = trainData[newInd], trainTarget[newInd]
      for j in range(batchRange):  
        #sampling           
        XBatch = trainData[j*minibatch_size:(j+1)*minibatch_size]
        YBatch = trainTarget[j*minibatch_size:(j+1)*minibatch_size]
      
        my_dict = { x: XBatch, y: YBatch, valid_data: validData, valid_target: validTarget, test_data: testData,test_target: testTarget}
        opt, updated_w, updated_b, train_loss, pred_y, valid_loss, valid_pred, test_loss, test_pred = session.run([optimizer, W, b, loss,y_hat, validLoss,  y_hat_valid, testLoss, y_hat_test], feed_dict=my_dict)
        
      trainLossArr.append(train_loss)
      trainAccuracy.append(accuracy(updated_w,trainData,updated_b,trainTarget))
       
      validLossArr.append(valid_loss)
      validAccuracy.append(accuracy(updated_w,validData,updated_b,validTarget))
        
      testLossArr.append(test_loss)
      testAccuracy.append(accuracy(updated_w,testData,updated_b,testTarget))
       
  return trainLossArr,validLossArr,testLossArr,trainAccuracy,validAccuracy,testAccuracy
  


In [6]:
def plot(figureNum, title,yLabel,trainArray,validArray,testArray):  
    f = plt.figure(figureNum)
    title = title 
    plt.title(title)  
    plt.ylabel(yLabel)
    plt.xlabel('Iterations')  
    # trainArray = savgol_filter(trainArray, 101, 4)
    plt.plot(range(700),trainArray)
    plt.plot(range(700),validArray)  
    # plt.plot(range(700),testArray)  
    plt.legend(["Training "+yLabel,"Valid "+yLabel],loc='upper right')
    plt.show()
    # plt.savefig(str(figureNum))

In [9]:
trainLossArr,validLossArr,testLossArr,trainAccuracy,validAccuracy,testAccuracy = buildGraph()

In [None]:
# Run with each of the following minibatch sizes: \\
# Batch = [100,700,1750] \\
# Set minibatch size to 500 and adjust the parameters of Adam optimizer one by one: \\
# β1 = {0.95, 0.99} \\
# β2 = {0.99, 0.9999} \\
# ε={1e−09,1e−4} \\

In [None]:
plot(3, "Training and Validation Loss with α=0.001 batch=700","Loss",trainLossArr,validLossArr,testLossArr)
plot(4, "Training and Validation Accuracy with α=0.001 batch=700","Accuracy",trainAccuracy,validAccuracy,testAccuracy)
print("training accuracy batch 500:")
print(trainAccuracy[699]) 
print("valid accuracy batch 500:")
print(validAccuracy[699])
print("testing accuracy batch 500:")
print(testAccuracy[699])