<a href="https://colab.research.google.com/github/Chaitanya-Atluri/Todo-list/blob/master/cn_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
################################################################################
#
# LOGISTICS
#
#    Your name as in eLearning
#    Your UT Dallas identifier
#

# DESCRIPTION
#
#    MNIST image classification with an xNN written and trained in Python
#
# INSTRUCTIONS
#
#    1. Go to Google Colaboratory: https://colab.research.google.com/notebooks/welcome.ipynb
#    2. File - New Python 3 notebook
#    3. Cut and paste this file into the cell (feel free to divide into multiple cells)
#    4. Runtime - Run all
#
# NOTES
#
#    1. This does not use PyTorch, TensorFlow or any other xNN library
#
#    2. Include a short summary here in nn.py of what you did for the neural
#       network portion of code
#
#    3. Include a short summary here in cnn.py of what you did for the
#       convolutional neural network portion of code
#
#    4. Include a short summary here in extra.py of what you did for the extra
#       portion of code
#
################################################################################

################################################################################
#
# IMPORT
#
################################################################################

#
# you should not need any import beyond the below
# PyTorch, TensorFlow, ... is not allowed
#

import os.path
import urllib.request
import gzip
import math
import numpy             as np
import matplotlib.pyplot as plt
import time



In [9]:

################################################################################
#
# PARAMETERS
#
################################################################################

#
# add other hyper parameters here with some logical organization
#

# data
DATA_NUM_TRAIN         = 60000
DATA_NUM_TEST          = 10000
DATA_CHANNELS          = 1
DATA_ROWS              = 28
DATA_COLS              = 28
DATA_CLASSES           = 10
DATA_URL_TRAIN_DATA    = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
DATA_URL_TRAIN_LABELS  = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
DATA_URL_TEST_DATA     = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'
DATA_URL_TEST_LABELS   = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'
DATA_FILE_TRAIN_DATA   = 'train_data.gz'
DATA_FILE_TRAIN_LABELS = 'train_labels.gz'
DATA_FILE_TEST_DATA    = 'test_data.gz'
DATA_FILE_TEST_LABELS  = 'test_labels.gz'
EPOCHS                 = 5

# display
DISPLAY_ROWS   = 8
DISPLAY_COLS   = 4
DISPLAY_COL_IN = 10
DISPLAY_ROW_IN = 25
DISPLAY_NUM    = DISPLAY_ROWS*DISPLAY_COLS




In [10]:
################################################################################
#
# DATA
#
################################################################################

# download
if (os.path.exists(DATA_FILE_TRAIN_DATA)   == False):
    urllib.request.urlretrieve(DATA_URL_TRAIN_DATA,   DATA_FILE_TRAIN_DATA)
if (os.path.exists(DATA_FILE_TRAIN_LABELS) == False):
    urllib.request.urlretrieve(DATA_URL_TRAIN_LABELS, DATA_FILE_TRAIN_LABELS)
if (os.path.exists(DATA_FILE_TEST_DATA)    == False):
    urllib.request.urlretrieve(DATA_URL_TEST_DATA,    DATA_FILE_TEST_DATA)
if (os.path.exists(DATA_FILE_TEST_LABELS)  == False):
    urllib.request.urlretrieve(DATA_URL_TEST_LABELS,  DATA_FILE_TEST_LABELS)

# training data
# unzip the file, skip the header, read the rest into a buffer and format to NCHW
file_train_data   = gzip.open(DATA_FILE_TRAIN_DATA, 'r')
file_train_data.read(16)
buffer_train_data = file_train_data.read(DATA_NUM_TRAIN*DATA_ROWS*DATA_COLS)
train_data        = np.frombuffer(buffer_train_data, dtype=np.uint8).astype(np.float64)
train_data        = train_data.reshape(DATA_NUM_TRAIN, 1, DATA_ROWS, DATA_COLS)

# training labels
# unzip the file, skip the header, read the rest into a buffer and format to a vector
file_train_labels   = gzip.open(DATA_FILE_TRAIN_LABELS, 'r')
file_train_labels.read(8)
buffer_train_labels = file_train_labels.read(DATA_NUM_TRAIN)
train_labels        = np.frombuffer(buffer_train_labels, dtype=np.uint8).astype(np.int64)

# testing data
# unzip the file, skip the header, read the rest into a buffer and format to NCHW
file_test_data   = gzip.open(DATA_FILE_TEST_DATA, 'r')
file_test_data.read(16)
buffer_test_data = file_test_data.read(DATA_NUM_TEST*DATA_ROWS*DATA_COLS)
test_data        = np.frombuffer(buffer_test_data, dtype=np.uint8).astype(np.float64)
test_data        = test_data.reshape(DATA_NUM_TEST, 1, DATA_ROWS, DATA_COLS)

# testing labels
# unzip the file, skip the header, read the rest into a buffer and format to a vector
file_test_labels   = gzip.open(DATA_FILE_TEST_LABELS, 'r')
file_test_labels.read(8)
buffer_test_labels = file_test_labels.read(DATA_NUM_TEST)
test_labels        = np.frombuffer(buffer_test_labels, dtype=np.uint8).astype(np.int64)

np.random.seed(1)

print(train_data[1])

# debug
# print(train_data.shape)   # (60000, 1, 28, 28)
# print(train_labels.shape) # (60000,)
# print(test_data.shape)    # (10000, 1, 28, 28)
# print(test_labels.shape)  # (10000,)


[[[  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
     0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
     0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
     0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
     0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
     0.  51. 159. 253. 159.  50.   0.   0.   0.   0.   0.   0.   0.   0.]
  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    48. 238. 252. 252. 252. 237.   0.   0.   0.   0.   0.   0.   0.   0.]
  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  54.
   227. 253. 252. 239. 233. 252.  57.   6.   

In [11]:
################################################################################
#
# YOUR CODE GOES HERE
#
################################################################################

#
# feel free to split this into some number of classes, functions, ... if it
# helps with code organization; for example, you may want to create a class for
# each of your layers that store parameters, performs initialization and
# includes forward and backward functions
#
# x = np.random.random((2,2,3))
# print(x)
#learning rate
lr= 0.001


# random.seed(1)
#one hot vector for labels
one_hot_labels = np.zeros((train_labels.shape[0], 10))

for i in range(train_labels.shape[0]):
    one_hot_labels[i, train_labels[i]] = 1
one_hot_labels = np.array(one_hot_labels, dtype='float32')

# Relu activation function
def relu(a, derivative=False):
  if derivative:
    return np.greater(a,0).astype(int)
  else:
    return np.maximum(0,a)

def softmax(X):
  """ applies softmax to an input x"""
  exps = np.exp(X - np.max(X))
  return exps / np.sum(exps)


def cross_entropy(X,y):
    """
    X is the output from fully connected layer (num_examples x num_classes)
    y is labels (num_examples x 1)
    	Note that y is not one-hot encoded vector. 
    	It can be computed as y.argmax(axis=1) from one-hot encoded vectors of labels if required.
    """
    m = y.shape[0]
    p = softmax(X)
    # We use multidimensional array indexing to extract 
    # softmax probability of the correct label for each sample.
    # Refer to https://docs.scipy.org/doc/numpy/user/basics.indexing.html#indexing-multi-dimensional-arrays for understanding multidimensional array indexing.
    log_likelihood = -np.log(p[range(m),y])
    loss = np.sum(log_likelihood) / m
    return loss
  

def delta_cross_entropy(X,y):
    """
    X is the output from fully connected layer (num_examples x num_classes)
    y is labels (num_examples x 1)
    	Note that y is not one-hot encoded vector. 
    	It can be computed as y.argmax(axis=1) from one-hot encoded vectors of labels if required.
    """
    m = y.shape[0]
    grad = softmax(X)
    grad[range(m),y] -= 1
    grad = grad/m
    return grad


# normalize
train_data = (train_data/255).astype('float32')
test_data = (test_data/255).astype('float32')


print(train_data[2])


# vectorize input array
train_data = train_data.reshape(train_data.shape[0],1,28*28)
test_data = test_data.reshape(test_data.shape[0],1,28*28)


# Hidden weight layer for input 
w1 = np.random.randn(train_data.shape[2], 1000)*np.sqrt(1./train_data.shape[2])
b1 = np.random.randn(1000)
 
# 2nd hidden layer
w2 = np.random.randn(w1.shape[1], 100)*np.sqrt(1./w1.shape[1])
b2 = np.random.randn(100)

# 3rd hidden layer
w3 = np.random.randn(w2.shape[1], 10)*np.sqrt(1./w2.shape[1])
b3 = np.random.randn(10)



[[[0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.        ]
  [0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.        ]
  [0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.        ]
  [0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
  

In [30]:

# Feed forward network
for epoch in range(1):
  program_starts = time.time()
  loss=0
  for i,val in enumerate(train_data):
    temp_input = np.copy(val)

    # print(temp_input)
    #First layer
    x1 = np.dot(temp_input, w1)
    z1 = np.add(x1,b1)
    # print(z1)
    a1 = relu(z1)
    # print(a1)
    #Second Layer
    x2 = np.dot(a1, w2)
    z2 = np.add(x2,b2)
    a2 = relu(z2)

    #Third layer
    x3 = np.dot(a2, w3)
    z3 = np.add(x3,b3)
    output = softmax(z3)


    #backprop and derivatives
    #Final output layer dealing with softmax function
    dcost_dz3 = output - one_hot_labels[i]
    dz3_dw3 = a2
    dcost_w3 = np.dot(dz3_dw3.T, dcost_dz3)
    dcost_b3 = dcost_dz3
    # print(dcost_w3.shape)
    # print(dcost_b3.shape)

    # Second layer derivatives
    dz3_da2 = w3
    dcost_da2 = np.dot(dcost_dz3 , dz3_da2.T)
    da2_dz2 = relu(z2, derivative=True)
    dz2_dw2 = a1
    dcost_w2 = np.dot(dz2_dw2.T, da2_dz2 * dcost_da2)
    dcost_b2 = dcost_da2 * da2_dz2
    # print(dcost_w2.shape)
    # print(dcost_b2.shape)


     # First layer derivatives
    dcost_dz2 = dcost_da2*da2_dz2
    dz2_da1 = w2
    dcost_da1 = np.dot(dcost_dz2 , dz2_da1.T)
    da1_dz1 = relu(z1, derivative=True)
    dz1_dw1 = temp_input
    # print(dz3_dw3.shape)
    dcost_w1 = np.dot(dz1_dw1.T, da1_dz1 * dcost_da1)   
    dcost_b1 = dcost_da1 * da1_dz1
 

    # Update weights
    # print(b3.shape, dcost_b3.shape)
    w1 -= lr * dcost_w1
    b1 -= lr * dcost_b1.sum(axis=0)
    w2 -= lr * dcost_w2
    b2 -= lr * dcost_b2.sum(axis=0)
    w3 -= lr * dcost_w3
    b3 -= lr * dcost_b3.sum(axis=0)
    loss = np.sum(-one_hot_labels[i] * np.log(output))
    # if(i%200 == 0):
    #   print(output)
    #   print("Cost is:", loss)
  now = time.time()
  print('Epoch: {} Loss: {:.6f} Time: {:.2f} secs'.format(epoch, loss, now-program_starts))
# error_cost.append(loss)
print(w2[0])





# output.shape
# print(w1)
# print(b1)

  # error_out = cross_entropy(output,train_labels[i])
    # print(error_out)
# temp_input

# cycle through the epochs

    # set the learning rate

    # cycle through the training data
        # forward pass
        # loss
        # back prop
        # weight update

    # cycle through the testing data
        # forward pass
        # accuracy
    # per epoch display (epoch, time, training loss, testing accuracy, ...)

# one_hot_labels[:10]
# train_data.shape[3]

Epoch: 0 Loss: 0.046342 Time: 0.01 secs
[-0.05367305  0.00763779  0.07336932  0.01095054 -0.01843116  0.0224264
  0.02115866 -0.01754726 -0.02960933 -0.01438819 -0.05227837  0.03657389
 -0.00663203  0.02059916 -0.00029619  0.02645698 -0.01984938  0.00030431
 -0.04036528 -0.05965132 -0.02643404  0.03395951  0.02703515 -0.09110154
 -0.03882302  0.01958246  0.02899009 -0.0093705   0.0548897   0.03701082
 -0.00028726  0.01786329  0.01015042  0.01053899 -0.03131747  0.00428253
  0.0007846   0.01396954 -0.03964925  0.02828508 -0.00150268  0.06345473
 -0.03230383 -0.01003738  0.06042196 -0.00802667  0.00865151  0.06845329
  0.01711505  0.02690829  0.01664318  0.02247241  0.04448335 -0.02958287
  0.06151644 -0.01344439 -0.03493614 -0.01652933 -0.00968075 -0.01680624
 -0.00679727 -0.02819122 -0.06329686  0.01544511  0.05664501 -0.05152432
 -0.05561453  0.03634719 -0.05044548 -0.01289443 -0.04242103 -0.03286042
 -0.01950868  0.06073322 -0.03789038 -0.01964027  0.01104531 -0.03351736
 -0.00049904

In [None]:

import pickle
output = open('w1.pkl', 'wb')
pickle.dump(w1,output)
output = open('w2.pkl', 'wb')
pickle.dump(w2,output)
output = open('w3.pkl', 'wb')
pickle.dump(w3,output)
output = open('b1.pkl', 'wb')
pickle.dump(b1,output)
output = open('b2.pkl', 'wb')
pickle.dump(b2,output)
output = open('b3.pkl', 'wb')
pickle.dump(b3,output)

In [None]:
from google.colab import files
files.download('w2.pkl')
files.download('w3.pkl')
files.download('b1.pkl')
files.download('b2.pkl')
files.download('b3.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# print(w3)
################################################################################
#
# DISPLAY
#
################################################################################

#
# more code for you to write
#
test_count =0
for i, val in enumerate(test_data):
  x1 = np.dot(val, w1)
  z1 = np.add(x1,b1)
  a1 = relu(z1)
  # print(a1)

  #Second Layer
  x2 = np.dot(a1, w2)
  z2 = np.add(x2,b2)
  a2 = relu(z2)
  # print(a2)

  #Third layer
  x3 = np.dot(a2, w3)
  z3 = np.add(x3,b3)
  # print(z3)
  output = softmax(z3)
  # print(output, test_labels[i])
  if(np.argmax(output) == test_labels[i]):
    test_count +=1
print(test_count)

print(output.shape)

print(train_labels[10])



8807
(1, 10)
3


In [None]:
# accuracy display
# final value
# plot of accuracy vs epoch

# performance display
# total time
# per layer info (type, input size, output size, parameter size, MACs, ...)

# example display
# replace the xNN predicted label with the label predicted by the network
fig = plt.figure(figsize=(DISPLAY_COL_IN, DISPLAY_ROW_IN))
ax  = []
for i in range(DISPLAY_NUM):
    img = test_data[i, :, :, :].reshape((DATA_ROWS, DATA_COLS))
    ax.append(fig.add_subplot(DISPLAY_ROWS, DISPLAY_COLS, i + 1))
    ax[-1].set_title('True: ' + str(test_labels[i]) + ' xNN: ' + str(test_labels[i]))
    plt.imshow(img, cmap='Greys')
plt.show()

IndexError: ignored

<Figure size 720x1800 with 0 Axes>