# COMS 4995_002 Deep Learning Assignment 1
Due on Monday, Oct 9, 11:59pm

This assignment can be done in groups of at most 3 students. Everyone must submit on Courseworks individually.

Write down the UNIs of your group (if applicable)

Member 1: Name, UNI

Member 2: Name, UNI

Member 3: Name, UNI

In [355]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc
import glob
import sys
# you shouldn't need to make any more imports

In [356]:
class NeuralNetwork(object):
    """
    Abstraction of neural network.
    Stores parameters, activations, cached values. 
    Provides necessary functions for training and prediction. 
    """
    def __init__(self, layer_dimensions, drop_prob=0.0, reg_lambda=0.0):
        """
        Initializes the weights and biases for each layer
        :param layer_dimensions: (list) number of nodes in each layer
        :param drop_prob: drop probability for dropout layers. Only required in part 2 of the assignment
        :param reg_lambda: regularization parameter. Only required in part 2 of the assignment
        """
        np.random.seed(1)
        
        self.parameters = {}
        self.num_layers = len(layer_dimensions)
        self.drop_prob = drop_prob
        self.reg_lambda = reg_lambda
        self.parameters['W'] = []
        self.parameters['b'] = []
        print(layer_dimensions)
        for i in range(self.num_layers-1):
            self.parameters['W'].append(np.random.standard_normal((layer_dimensions[i+1],layer_dimensions[i])))
            self.parameters['b'].append(np.zeros((layer_dimensions[i+1],1)))
        # init parameters
        

    def affineForward(self, A, W, b):
        """
        Forward pass for the affine layer.
        :param A: input matrix, shape (L, S), where L is the number of hidden units in the previous layer and S is
        the number of samples
        :returns: the affine product WA + b, along with the cache required for the backward pass
        """
        #print('b',b.shape)
        #print('W',W.shape)
        return np.add(np.matmul(W,A),b)

    def activationForward(self, A, activation="relu"):
        """
        Common interface to access all activation functions.
        :param A: input to the activation function
        :param prob: activation funciton to apply to A. Just "relu" for this assignment.
        :returns: activation(A)
        """ 
        return getattr(self,activation)(A)

    def relu(self, X):
        return 1/(1+np.exp(-X))
            
    def dropout(self, A, prob):
        """
        :param A: 
        :param prob: drop prob
        :returns: tuple (A, M) 
            WHERE
            A is matrix after applying dropout
            M is dropout mask, used in the backward pass
        """
        M = np.random.choice([0,1],(3,6),p=[prob,1-prob])/(1-prob)
        A = np.multiply(A,M)
        
        return A, M
    
    def forwardPropagation(self, X):
        """
        Runs an input X through the neural network to compute activations
        for all layers. Returns the output computed at the last layer along
        with the cache required for backpropagation.
        :returns: (tuple) AL, cache
            WHERE 
            AL is activation of last layer
            cache is cached values for each layer that
                     are needed in further steps
        """
        cache = {}
        cache['A'] = []
        cache['Z'] = []
        cache['A'].append(X)
        for i in range(self.num_layers-2):
            X = self.affineForward(X, self.parameters['W'][i], self.parameters['b'][i])
            #print('z',X)
            cache['Z'].append(X)
            X = self.relu(X)
            cache['A'].append(X)
            #print(X)
        X = self.affineForward(X, self.parameters['W'][self.num_layers-2], self.parameters['b'][self.num_layers-2])
        cache['Z'].append(X)
        X = self.softmax(X)
        AL = X
        #print(AL[:,0])
        return AL, cache
    
    def softmax(self, X):
        return np.exp(X)/np.sum(np.exp(X),axis=0)
    
    def costFunction(self, AL, Y_mini):
        """
        :param AL: Activation of last layer, shape (num_classes, S)
        :param y: labels, shape (S)
        :param alpha: regularization parameter
        :returns cost, dAL: A scalar denoting cost and the gradient of cost
        """
        self.parameters['#data'] = Y_mini.shape[1]
        # compute loss
        cost = -np.sum(np.multiply(Y_mini, np.log(AL))+np.multiply(1-Y_mini,np.log(1-AL)))/self.parameters['#data']
        if self.reg_lambda > 0:
            # add regularization
           1
        
        # gradient of cost
        dAL = -np.multiply(Y_mini,1/AL)+np.multiply(1-Y_mini,1/(1-AL))
        return cost, dAL
    
    def myonehot(self,y):
        Y=np.zeros((10,y.shape[0]))
        for i in range(y.shape[0]):
            Y[y[i]][i] = 1
        return Y
    def affineBackward(self, dA_prev, cache, layer):
        """
        Backward pass for the affine layer.
        :param dA_prev: gradient from the next layer.
        :param cache: cache returned in affineForward
        :returns dA: gradient on the input to this layer
                 dW: gradient on the weights
                 db: gradient on the bias
        """
        if layer == self.num_layers-2:
            dZ = dA_prev*self.softmax(cache['Z'][layer])*(1-self.softmax(cache['Z'][layer]))
        else:
            dZ = dA_prev*self.relu(cache['Z'][layer])*(1-self.relu(cache['Z'][layer]))
        dA = np.matmul(self.parameters['W'][layer].T, dZ)
        dW = np.matmul(dZ,cache['A'][layer].T)/self.parameters['#data']
        db = (np.sum(dZ,axis = 1)/self.parameters['#data']).reshape(dW.shape[0],1)
       # print('db',db.shape,db)
        return dA, dW, db

    def activationBackward(self, dA, cache, activation="relu"):
        """
        Interface to call backward on activation functions.
        In this case, it's just relu. 
        """
        
        
    def relu_derivative(self, dx, cached_x):

        return dx

    def dropout_backward(self, dA, cache):

        return dA

    def backPropagation(self, dAL, Y, cache):
        """
        Run backpropagation to compute gradients on all paramters in the model
        :param dAL: gradient on the last layer of the network. Returned by the cost function.
        :param Y: labels
        :param cache: cached values during forwardprop
        :returns gradients: dW and db for each weight/bias
        """
        gradients = {}
        
        for i in range(self.num_layers-1)[::-1]:
            dAL, dW, db = self.affineBackward(dAL,cache,i)
            gradients[i] = [dW,db]
            if self.drop_prob > 0:
                1
                #call dropout_backward
            if self.reg_lambda > 0:
                1
            # add gradients from L2 regularization to each dW
        
        return gradients


    def updateParameters(self, gradients, alpha):
        """
        :param gradients: gradients for each weight/bias
        :param alpha: step size for gradient descent 
        """
        for i in range(self.num_layers-1):
            self.parameters['W'][i] = self.parameters['W'][i] - alpha*gradients[i][0]
            self.parameters['b'][i] = self.parameters['b'][i] - alpha*gradients[i][1]
        
    def train(self, X, y, iters=1000, alpha=0.0001, batch_size=1000, print_every=100):
        """
        :param X: input samples, each column is a sample
        :param y: labels for input samples, y.shape[0] must equal X.shape[1]
        :param iters: number of training iterations
        :param alpha: step size for gradient descent
        :param batch_size: number of samples in a minibatch
        :param print_every: no. of iterations to print debug info after
        """
        Y = self.myonehot(y)
        for i in range(0, iters):
            # get minibatch
            X_mini, Y_mini, y_mini = self.get_batch(X,Y,y,batch_size)
            # forward prop
            AL, cache = self.forwardPropagation(X_mini)
            # compute loss
            cost, dAL = self.costFunction(AL, Y_mini)
            # compute gradients
            gradients = self.backPropagation(dAL, y, cache)
            # update weights and biases based on gradient
            self.updateParameters(gradients,alpha)
            if i % print_every == 0:
                # print cost, train and validation set accuracies
                print('cost')
                print(cost)
                print('train accuracy')
                print(self.accu(AL,y_mini))
                print('validation accuracy')
                X_v, Y_v, y_v = self.get_batch(X,Y,y,5000)
                accu_v = self.accu(self.forwardPropagation(X_v)[0],y_v)
                print(accu_v)
                
    def accu(self, AL, y):
        print(AL[:,0])
        return np.sum(np.equal(np.argmax(AL,axis = 0), y))/float(y.shape[0])
        
    def predict(self, X):
        """
        Make predictions for each sample
        """
        AL = self.forwardPropagation(X)[0]
        y_pred = np.argmax(AL,axis = 0)
        return y_pred

    def get_batch(self, X, Y, y, batch_size):
        """
        Return minibatch of samples and labels
        
        :param X, y: samples and corresponding labels
        :parma batch_size: minibatch size
        :returns: (tuple) X_batch, y_batch
        """
        sample = np.random.choice(int(X.shape[1]),size=(batch_size),replace = False)
        X_mini = X[:,sample]
        Y_mini = Y[:,sample]
        y_mini = y[sample]
        return X_mini, Y_mini, y_mini

In [357]:
# Helper functions, DO NOT modify this

def get_img_array(path):
    """
    Given path of image, returns it's numpy array
    """
    return scipy.misc.imread(path)

def get_files(folder):
    """
    Given path to folder, returns list of files in it
    """
    filenames = [file for file in glob.glob(folder+'*/*')]
    filenames.sort()
    return filenames

def get_label(filepath, label2id):
    """
    Files are assumed to be labeled as: /path/to/file/999_frog.png
    Returns label for a filepath
    """
    tokens = filepath.split('/')
    label = tokens[-1].split('_')[1][:-4]
    if label in label2id:
        return label2id[label]
    else:
        sys.exit("Invalid label: " + label)

In [358]:
# Functions to load data, DO NOT change these

def get_labels(folder, label2id):
    """
    Returns vector of labels extracted from filenames of all files in folder
    :param folder: path to data folder
    :param label2id: mapping of text labels to numeric ids. (Eg: automobile -> 0)
    """
    files = get_files(folder)
    y = []
    for f in files:
        y.append(get_label(f,label2id))
    return np.array(y)

def one_hot(y, num_classes=10):
    """
    Converts each label index in y to vector with one_hot encoding
    """
    y_one_hot = np.zeros((y.shape[0], num_classes))
    y_one_hot[y] = 1
    return y_one_hot.T

def get_label_mapping(label_file):
    """
    Returns mappings of label to index and index to label
    The input file has list of labels, each on a separate line.
    """
    with open(label_file, 'r') as f:
        id2label = f.readlines()
        id2label = [l.strip() for l in id2label]
    label2id = {}
    count = 0
    for label in id2label:
        label2id[label] = count
        count += 1
    return id2label, label2id

def get_images(folder):
    """
    returns numpy array of all samples in folder
    each column is a sample resized to 30x30 and flattened
    """
    files = get_files(folder)
    images = []
    count = 0
    
    for f in files:
    #for i in range(5000):
     #   f = files[i]
        count += 1
        if count % 5000 == 0:
            print("Loaded {}/{}".format(count,len(files)))
        img_arr = get_img_array(f)
        img_arr = img_arr.flatten() / 255.0
        images.append(img_arr)
    X = np.column_stack(images)

    return X

def get_train_data(data_root_path):
    """
    Return X and y
    """
    train_data_path = data_root_path + 'train'
    id2label, label2id = get_label_mapping(data_root_path+'labels.txt')
    print(label2id)
    X = get_images(train_data_path)
    y = get_labels(train_data_path, label2id)
    return X, y

def save_predictions(filename, y):
    """
    Dumps y into .npy file
    """
    np.save(filename, y)

In [359]:
# Load the data
data_root_path = './cifar10-hw1/'
X_train, y_train = get_train_data(data_root_path) # this may take a few minutes
X_test = get_images(data_root_path + 'test')
print('Data loading done')

{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
Loaded 5000/50000
Loaded 10000/50000
Loaded 15000/50000
Loaded 20000/50000
Loaded 25000/50000
Loaded 30000/50000
Loaded 35000/50000
Loaded 40000/50000
Loaded 45000/50000
Loaded 50000/50000
Loaded 5000/10000
Loaded 10000/10000
Data loading done


## Part 1

#### Simple fully-connected deep neural network

In [None]:
layer_dimensions = [X_train.shape[0], 1000, 1500, 400, 10]  # including the input and output layers
NN = NeuralNetwork(layer_dimensions)
NN.train(X_train, y_train, iters=100000, alpha=0.03, batch_size=100, print_every=100)

[3072, 1000, 1500, 400, 10]
cost
24.6280466371
train accuracy
[  1.90405348e-06   3.70645892e-17   7.84739775e-08   2.54972953e-11
   1.70846485e-03   1.51625306e-08   1.08811874e-05   7.77662128e-12
   9.98278656e-01   3.68779317e-10]
0.12
validation accuracy
[  1.99956990e-05   3.96095694e-13   2.69463627e-07   7.37470796e-10
   9.38134017e-01   1.25157531e-06   2.94983092e-08   1.47792662e-09
   6.09277693e-02   9.16665476e-04]
0.1008
cost
7.03621236917
train accuracy
[  5.58018630e-05   3.03660737e-04   8.62004504e-05   4.17459631e-05
   2.34427682e-06   9.44874885e-04   2.73608709e-03   7.92870247e-04
   2.23566923e-04   9.94812848e-01]
0.15
validation accuracy
[  1.99697422e-03   1.57660647e-04   4.54993848e-03   1.09775230e-04
   5.39165617e-07   3.22472681e-03   3.32597613e-02   4.13742328e-02
   1.04852572e-02   9.04841134e-01]
0.1612
cost
5.90197304743
train accuracy
[  2.30682782e-02   6.12078488e-04   6.32226236e-02   9.88399422e-05
   2.74245063e-01   9.80818260e-04   2.01

[ 0.04723724  0.06238438  0.1006734   0.0947857   0.12388943  0.034448
  0.07089957  0.16454633  0.13433437  0.16680158]
0.2548
cost
2.94746237035
train accuracy
[ 0.2016167   0.05632997  0.12846539  0.0951607   0.11905164  0.03062811
  0.04672914  0.12116648  0.15423825  0.04661362]
0.28
validation accuracy
[ 0.23559356  0.10101649  0.07332884  0.05610659  0.07274698  0.02817732
  0.05399653  0.10219706  0.20722561  0.06961103]
0.2428
cost
3.05154202991
train accuracy
[ 0.42219338  0.06957984  0.07001847  0.06398912  0.05547206  0.08142346
  0.04460692  0.07060351  0.05371639  0.06839685]
0.27
validation accuracy
[ 0.02266852  0.06272389  0.14333906  0.14956752  0.17446789  0.09081777
  0.15430529  0.14124756  0.02254374  0.03831877]
0.257


In [None]:
y_predicted = NN.predict(X_test)
save_predictions('ans1-uni', y_predicted)

In [None]:
# test if your numpy file has been saved correctly
loaded_y = np.load('ans1-uni.npy')
print(loaded_y.shape)
loaded_y[:10]

## Part 2: Regularizing the neural network
#### Add dropout and L2 regularization

In [None]:
NN2 = NeuralNetwork(layer_dimensions, drop_prob=0, reg_lambda=0)
NN2.train(X_train, y_train, iters=1000, alpha=0.00001, batch_size=1000, print_every=10)

In [None]:
y_predicted2 = NN2.predict(X)
save_predictions(y_predicted, 'ans2-uni')