# COMS 4995_002 Deep Learning Assignment 1
Due on Thursday, Feb 8, 11:59pm

This assignment can be done in groups of at most 2 students. Everyone must submit on Courseworks individually.

Write down the UNIs of your group (if applicable)

Member 1: Apoorv Purwar, ap3644

Member 2: Shreyas Mundhra, ssm2211

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc
import glob
import sys
# you shouldn't need to make any more imports

In [16]:
class NeuralNetwork(object):
    """
    Abstraction of neural network.
    Stores parameters, activations, cached values. 
    Provides necessary functions for training and prediction. 
    """
    def __init__(self, layer_dimensions, drop_prob=0.0, reg_lambda=0.0):
        """
        Initializes the weights and biases for each layer
        :param layer_dimensions: (list) number of nodes in each layer
        :param drop_prob: drop probability for dropout layers. Only required in part 2 of the assignment
        :param reg_lambda: regularization parameter. Only required in part 2 of the assignment
        """
        np.random.seed(1)
        
        self.num_layers = len(layer_dimensions) #Length of tuple is the number of layers
        self.parameters = {} 
        for i in range(1, self.num_layers):
            curr_layer_dim = layer_dimensions[i]
            prev_layer_dim = layer_dimensions[i-1]
            
#            bias = (np.random.uniform()/10)*np.ones((curr_layer_dim,1)) + 0.01 
#            weight = np.random.normal(0,10*np.random.uniform(),(curr_layer_dim, prev_layer_dim)) 

            epsilon = np.sqrt(2.0 / (curr_layer_dim + prev_layer_dim))
            # Random Initialisation of weights
            weight = np.random.randn(curr_layer_dim, prev_layer_dim) * epsilon
            # Random Initialisation of bias
            bias = np.zeros((curr_layer_dim, 1)) + 0.01 # added to make sure relus fire in the start
            self.parameters[i] = [weight, bias]
        self.drop_prob = 1 - drop_prob
        self.reg_lambda = reg_lambda  
        
    def affineForward(self, A, W, b):
        """
        Forward pass for the affine layer.
        :param A: input matrix, shape (L, S), where L is the number of hidden units in the previous layer and S is
        the number of samples
        :returns: the affine product WA + b, along with the cache required for the backward pass
        """
#         print("affine W shape: ",W.shape)
#         print("affine forward A shape: ",A.shape)
        y = np.matmul(W,A) + b
#        y = W.dot(A) + b
#         print("unique y: ",np.unique(y))
        cache = [A,y,W]
        return y, cache
        

    def activationForward(self, A, activation="relu"):
        """¬
        Common interface to access all activation functions.
        :param A: input to the activation function
        :param prob: activation funciton to apply to A. Just "relu" for this assignment.
        :returns: activation(A)
        """ 
        activation = self.relu(A)  # since no other activation fn exists
        return activation


    def relu(self, X):
        A = np.maximum(0, X)
        return A
            
    def dropout(self, A, prob):
        M = (np.random.rand(*A.shape)<prob)/prob
        A = A*M
        return A, M
           
       # param A: 
       # :param prob: drop prob
       # :returns: tuple (A, M) 
       #     WHERE
       #     A is matrix after applying dropout
       #     M is dropout mask, used in the backward pass
      

    def forwardPropagation(self, X):
        """
        Runs an input X through the neural network to compute activations
        for all layers. Returns the output computed at the last layer along
        with the cache required for backpropagation.
        :returns: (tuple) AL, cache
            WHERE 
            AL is activation of last layer
            cache is cached values for each layer that
                     are needed in further steps
        """
        cache = {}
        prev_activation = X 
        for i in range(1, self.num_layers-1):
            nodes = layer_dimensions[i]
            curr_activations = np.zeros((nodes,X.shape[1])) # Get the column count of X - Can we comment this?
            curr_activations, cache[i] = \
            self.affineForward(prev_activation, self.parameters[i][0], self.parameters[i][1])
            curr_activations = self.activationForward(curr_activations)
            prev_activation = curr_activations
            
        
        nodes = layer_dimensions[self.num_layers-1]
        curr_activations = np.zeros((nodes,X.shape[1])) # Get the column count of X - Can we comment this?
        curr_activations, cache[self.num_layers-1] = \
        self.affineForward(prev_activation, self.parameters[self.num_layers-1][0], self.parameters[self.num_layers-1][1])
        
        AL = curr_activations

        return AL, cache

    def softmax(self, X):
        return np.exp(X) / np.sum(np.exp(X),axis=0) #changes input into probabilities

    
    def costFunction(self, AL, y):
        """
        :param AL: Activation of last layer, shape (num_classes, S)
        :param y: labels, shape (S)
        :param alpha: regularization parameter
        :returns cost, dAL: A scalar denoting cost and the gradient of cost
        """
        
#         print("Unique AL: ",np.unique(AL))
        numberSamples = AL.shape[1]
        AL = AL - np.max(AL, axis=0, keepdims=True) #shifted values 
        AL_prob = self.softmax(AL)
        correct_label_prob = AL_prob[y, np.arange(numberSamples)]
        logProb = np.log(correct_label_prob)
        cost = -np.sum(logProb) / numberSamples
        # gradient of cost
        AL_prob[y, np.arange(numberSamples)] -= 1
        dAL = AL_prob/float(numberSamples)
        return cost, dAL

            

    def affineBackward(self, dA_prev, cache):
        """
        Backward pass for the affine layer.
        :param dA_prev: gradient from the next layer.
        :param cache: cache returned in affineForward
        :returns dA: gradient on the input to this layer
                 dW: gradient on the weights
                 db: gradient on the bias
        """
        A = cache[0]
        y = cache[1]
        W = cache[2]
        
        S = y.shape[1]
        
        dy = self.activationBackward(dA_prev, cache)
#         print("Unique dy: " + str(np.unique(dy)))
        
        dA = np.matmul(W.transpose(),dy)
        
        dW = np.matmul(dy, A.transpose())
        db = np.sum(dy, axis=1)
        db = db.reshape((len(db), 1))
        return dA, dW, db

    def activationBackward(self, dA, cache, activation="relu"):
        """
        Interface to call backward on activation functions.
        In this case, it's just relu. 
        """
        dx = self.relu_derivative(dA,cache[1])
        return dx

        
    def relu_derivative(self, dx, cached_x):
        nodes, S = cached_x.shape
        cached_x_derivative = np.zeros((nodes,S))
        cached_x_derivative[cached_x > 0] = 1
        dx = np.multiply(dx,cached_x_derivative)
        return dx

    def dropout_backward(self, dA, cache):

        return dA

    def backPropagation(self, dAL, Y, cache):
        """
        Run backpropagation to compute gradients on all paramters in the model
        :param dAL: gradient on the last layer of the network. Returned by the cost function.
        :param Y: labels
        :param cache: cached values during forwardprop
        :returns gradients: dW and db for each weight/bias
        """
        gradients = {}
        
        S = len(Y)
        A = cache[self.num_layers-1][0]
#         print("backprop A shape: ",A.shape)
        W = cache[self.num_layers-1][2]
        dA = np.matmul(W.transpose(),dAL)
        
        dW = np.matmul(dAL, A.transpose())
        db = np.sum(dAL, axis=1)
        db = db.reshape((len(db), 1))
        gradients[self.num_layers-1] = [dW,db]
        dA_prev = dA
        
        for l in range(self.num_layers-2,0,-1):
            dA, dW, db = self.affineBackward(dA_prev, cache[l])
#             print("Unique db: " + str(np.unique(db)))
            
            gradients[l] = [dW,db]
#             print("backprop dA shape: ",dA.shape)
#             print("backprop dW shape: ",dW.shape)
#             print("backprop db shape: ",db.shape)
            dA_prev = dA
            
        if self.drop_prob > 0:
                #call dropout_backward
                pass
           
            
        if self.reg_lambda > 0:
            # add gradients from L2 regularization to each dW
            pass
        
        return gradients


    def updateParameters(self, gradients, alpha):
        """
        :param gradients: gradients for each weight/bias
        :param alpha: step size for gradient descent 
        """
#         print("Unique gradients: " + str(np.unique(np.asarray(gradients))))
        for i in range(1, self.num_layers):
            self.parameters[i][0] = self.parameters[i][0] - alpha*gradients[i][0]
            self.parameters[i][1] = self.parameters[i][1] - alpha*gradients[i][1]

    def train(self, X, y, iters=1000, alpha=0.0001, batch_size=100, print_every=100):
        """
        :param X: input samples, each column is a sample
        :param y: labels for input samples, y.shape[0] must equal X.shape[1]
        :param iters: number of training iterations
        :param alpha: step size for gradient descent
        :param batch_size: number of samples in a minibatch
        :param print_every: no. of iterations to print debug info after
        """
        
        S = len(y)
        train_split_percent = 90
        
        num_train = int(train_split_percent*S/100)
        num_val = S - num_train
       
        X_train = X[:, :num_train]
        y_train = y[:num_train]
        X_val = X[:, num_train:]
        y_val = y[num_train:]
        
        X_batch, y_batch = self.get_batch(X_train,y_train,batch_size)
        numBatches = len(y_batch)
        
        j = 0
        
        if j%150 == 0:
            alpha = alpha/10
        
        for i in range(0, iters):
#             for j in range(0, numBatches):
            # get minibatch
            X_mini = X_batch[i % numBatches]
            y_mini = y_batch[i % numBatches]

            # forward prop
            AL, cache = self.forwardPropagation(X_mini)

            # compute loss
            cost, dAL = self.costFunction(AL, y_mini)

            # compute gradients
            gradients = self.backPropagation(dAL, y_mini, cache)

            # update weights and biases based on gradient
            self.updateParameters(gradients, alpha)

            if i % print_every == 0:
                y_pred_train = self.predict(X_train)
                
                y_pred_val = self.predict(X_val)
                
                train_preds = y_pred_train
                train_accuracy = 0
                for i in range(0, len(y_train)):
                    if y_pred_train[y_train[i],i] == 1:
                        train_accuracy = train_accuracy + 1
                train_accuracy = train_accuracy*100.0/num_train
                
                val_accuracy = 0
                for i in range(0, len(y_val)):
                    if y_pred_val[y_val[i],i] == 1:
                        val_accuracy = val_accuracy + 1
                val_accuracy = val_accuracy*100.0/num_val
                
                # print cost, train and validation set accuracies
                j = j+1
                print("Iter -",j," : Loss, training accuracy, val accuracy: ", cost, train_accuracy, val_accuracy)
                
                
    def predict(self, X):
        """
        Make predictions for each sample
        """         
        AL, cache = self.forwardPropagation(X)
        
        S = AL.shape[1]
        y_pred = np.zeros(S, dtype=int)
        
        for j in range(0, S):
            y_pred[j] = np.argmax(AL[:,j])
        
        return one_hot(y_pred)

    def get_batch(self, X, y, batch_size):
        """
        Return minibatch of samples and labels
        
        :param X, y: samples and corresponding labels
        :parma batch_size: minibatch size
        :returns: (tuple) X_batch, y_batch
        """
        n = len(y)
        
        X_batch = []
        y_batch = []
        for start in range(0,n-batch_size,batch_size):
            X_batch.append(X[:,start:start+batch_size])
            y_batch.append(y[start:start+batch_size])
            
        return X_batch, y_batch

In [17]:
# Helper functions, DO NOT modify this

def get_img_array(path):
    """
    Given path of image, returns it's numpy array
    """
    return scipy.misc.imread(path)

def get_files(folder):
    """
    Given path to folder, returns list of files in it
    """
    filenames = [file for file in glob.glob(folder+'*/*')]
    filenames.sort()
    return filenames

def get_label(filepath, label2id):
    """
    Files are assumed to be labeled as: /path/to/file/999_frog.png
    Returns label for a filepath
    """
    tokens = filepath.split('/')
    label = tokens[-1].split('_')[1][:-4]
    if label in label2id:
        return label2id[label]
    else:
        sys.exit("Invalid label: " + label)

In [18]:
# Functions to load data, DO NOT change these

def get_labels(folder, label2id):
    """
    Returns vector of labels extracted from filenames of all files in folder
    :param folder: path to data folder
    :param label2id: mapping of text labels to numeric ids. (Eg: automobile -> 0)
    """
    files = get_files(folder)
    y = []
    for f in files:
        y.append(get_label(f,label2id))
    return np.array(y)

def one_hot(y, num_classes=10):
    """
    Converts each label index in y to vector with one_hot encoding
    """
    y_one_hot = np.zeros((num_classes, y.shape[0]))
    y_one_hot[y, range(y.shape[0])] = 1
    return y_one_hot

def get_label_mapping(label_file):
    """
    Returns mappings of label to index and index to label
    The input file has list of labels, each on a separate line.
    """
    with open(label_file, 'r') as f:
        id2label = f.readlines()
        id2label = [l.strip() for l in id2label]
    label2id = {}
    count = 0
    for label in id2label:
        label2id[label] = count
        count += 1
    return id2label, label2id

def get_images(folder):
    """
    returns numpy array of all samples in folder
    each column is a sample resized to 30x30 and flattened
    """
    files = get_files(folder)
    images = []
    count = 0
    
    for f in files:
        count += 1
        if count % 10000 == 0:
            print("Loaded {}/{}".format(count,len(files)))
        img_arr = get_img_array(f)
        img_arr = img_arr.flatten() / 255.0
        images.append(img_arr)
    X = np.column_stack(images)

    return X

def get_train_data(data_root_path):
    """
    Return X and y
    """
    train_data_path = data_root_path + 'train'
    id2label, label2id = get_label_mapping(data_root_path+'labels.txt')
    print(label2id)
    X = get_images(train_data_path)
    y = get_labels(train_data_path, label2id)
    return X, y

def save_predictions(filename, y):
    """
    Dumps y into .npy file
    """
    np.save(filename, y)

In [6]:
# Load the data
data_root_path = './cifar10-hw1/'
X_train, y_train = get_train_data(data_root_path) # this may take a few minutes
X_test = get_images(data_root_path + 'test')
print('Data loading done')

{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
Loaded 10000/50000
Loaded 20000/50000
Loaded 30000/50000
Loaded 40000/50000
Loaded 50000/50000
Loaded 10000/10000
Data loading done


## Part 1

#### Simple fully-connected deep neural network

In [None]:
# layer_dimensions = [X_train.shape[0], 3, 10]  # including the input and output layers
# NN = NeuralNetwork(layer_dimensions)
# NN.train(X_train, y_train, alpha=0.0001,print_every=100)
layer_dimensions = [X_train.shape[0], 250, 150, 10]  # including the input and output layers
NN = NeuralNetwork(layer_dimensions)
NN.train(X_train, y_train, iters=35000, alpha=0.0001, batch_size=500, print_every=100)

Iter - 1  : Loss, training accuracy, val accuracy:  2.5437469365 11.431111111111111 11.46
Iter - 2  : Loss, training accuracy, val accuracy:  2.54264613742 11.873333333333333 11.94
Iter - 3  : Loss, training accuracy, val accuracy:  2.46241627431 12.144444444444444 12.28
Iter - 4  : Loss, training accuracy, val accuracy:  2.44836740982 12.386666666666667 12.44


Iter - 1  :Cost, training accuracy, val accuracy: ) 2.55379688428 10.246666666666666 10.22
Iter - 2  :Cost, training accuracy, val accuracy: ) 2.3806179537 10.5 10.6
Iter - 3  :Cost, training accuracy, val accuracy: ) 2.34165291695 10.77111111111111 10.84
Iter - 4  :Cost, training accuracy, val accuracy: ) 2.32267271701 10.922222222222222 10.82
Iter - 5  :Cost, training accuracy, val accuracy: ) 2.28350797742 11.331111111111111 10.82
Iter - 6  :Cost, training accuracy, val accuracy: ) 2.28652108003 11.88 11.92
Iter - 7  :Cost, training accuracy, val accuracy: ) 2.27076477338 13.27111111111111 13.46
Iter - 8  :Cost, training accuracy, val accuracy: ) 2.25699139836 14.702222222222222 15.62
Iter - 9  :Cost, training accuracy, val accuracy: ) 2.26019608115 15.662222222222223 16.54
Iter - 10  :Cost, training accuracy, val accuracy: ) 2.26335574922 16.586666666666666 17.52
Iter - 11  :Cost, training accuracy, val accuracy: ) 2.24298664742 17.29111111111111 18.22
Iter - 12  :Cost, training accuracy, val accuracy: ) 2.22367879005 17.91111111111111 18.7
Iter - 13  :Cost, training accuracy, val accuracy: ) 2.24057023527 18.477777777777778 19.24
Iter - 14  :Cost, training accuracy, val accuracy: ) 2.23109161133 19.02888888888889 19.98
Iter - 15  :Cost, training accuracy, val accuracy: ) 2.23560289507 19.442222222222224 20.5
Iter - 16  :Cost, training accuracy, val accuracy: ) 2.23277192987 19.92888888888889 20.8
Iter - 17  :Cost, training accuracy, val accuracy: ) 2.21431933237 20.3 20.9
Iter - 18  :Cost, training accuracy, val accuracy: ) 2.22072490548 20.633333333333333 21.3
Iter - 19  :Cost, training accuracy, val accuracy: ) 2.20235833754 21.046666666666667 21.68
Iter - 20  :Cost, training accuracy, val accuracy: ) 2.2094625059 21.5 22.0
Iter - 21  :Cost, training accuracy, val accuracy: ) 2.20448502134 21.84 22.38
Iter - 22  :Cost, training accuracy, val accuracy: ) 2.20049858418 22.17111111111111 22.8
Iter - 23  :Cost, training accuracy, val accuracy: ) 2.1952676478 22.40888888888889 23.06
Iter - 24  :Cost, training accuracy, val accuracy: ) 2.18572229106 22.677777777777777 23.46
Iter - 25  :Cost, training accuracy, val accuracy: ) 2.19393918203 22.906666666666666 23.8
Iter - 26  :Cost, training accuracy, val accuracy: ) 2.17560718331 23.23111111111111 24.14
Iter - 27  :Cost, training accuracy, val accuracy: ) 2.19032743575 23.493333333333332 24.46
Iter - 28  :Cost, training accuracy, val accuracy: ) 2.16290962452 23.704444444444444 24.58
Iter - 29  :Cost, training accuracy, val accuracy: ) 2.17048082629 23.915555555555557 24.9
Iter - 30  :Cost, training accuracy, val accuracy: ) 2.17916258212 24.124444444444446 25.02
Iter - 31  :Cost, training accuracy, val accuracy: ) 2.15327964047 24.264444444444443 25.1
Iter - 32  :Cost, training accuracy, val accuracy: ) 2.17397712846 24.468888888888888 25.36
Iter - 33  :Cost, training accuracy, val accuracy: ) 2.15583218293 24.575555555555557 25.44
Iter - 34  :Cost, training accuracy, val accuracy: ) 2.15864076149 24.73777777777778 25.74
Iter - 35  :Cost, training accuracy, val accuracy: ) 2.16737224601 25.00888888888889 25.92
Iter - 36  :Cost, training accuracy, val accuracy: ) 2.12960095862 25.175555555555555 26.12
Iter - 37  :Cost, training accuracy, val accuracy: ) 2.16266261937 25.326666666666668 26.24
Iter - 38  :Cost, training accuracy, val accuracy: ) 2.13225211689 25.477777777777778 26.46
Iter - 39  :Cost, training accuracy, val accuracy: ) 2.15708280438 25.642222222222223 26.64
Iter - 40  :Cost, training accuracy, val accuracy: ) 2.13069763558 25.744444444444444 26.74
Iter - 41  :Cost, training accuracy, val accuracy: ) 2.15816051722 25.875555555555554 26.96
Iter - 42  :Cost, training accuracy, val accuracy: ) 2.14413770501 25.968888888888888 27.14
Iter - 43  :Cost, training accuracy, val accuracy: ) 2.1053991775 26.177777777777777 27.3
Iter - 44  :Cost, training accuracy, val accuracy: ) 2.13142869151 26.364444444444445 27.44
Iter - 45  :Cost, training accuracy, val accuracy: ) 2.10860054587 26.475555555555555 27.68
Iter - 46  :Cost, training accuracy, val accuracy: ) 2.13691440575 26.63111111111111 27.6
Iter - 47  :Cost, training accuracy, val accuracy: ) 2.11976258851 26.753333333333334 27.6
Iter - 48  :Cost, training accuracy, val accuracy: ) 2.12856955961 26.866666666666667 27.76
Iter - 49  :Cost, training accuracy, val accuracy: ) 2.11671758356 26.91777777777778 27.74
Iter - 50  :Cost, training accuracy, val accuracy: ) 2.10072573624 27.0 27.78
Iter - 51  :Cost, training accuracy, val accuracy: ) 2.11867094073 27.17111111111111 28.08
Iter - 52  :Cost, training accuracy, val accuracy: ) 2.11617040817 27.266666666666666 28.16
Iter - 53  :Cost, training accuracy, val accuracy: ) 2.11383647076 27.377777777777776 28.38
Iter - 54  :Cost, training accuracy, val accuracy: ) 2.07464305608 27.484444444444446 28.42
Iter - 55  :Cost, training accuracy, val accuracy: ) 2.12252899232 27.602222222222224 28.46
Iter - 56  :Cost, training accuracy, val accuracy: ) 2.07319063925 27.773333333333333 28.56
Iter - 57  :Cost, training accuracy, val accuracy: ) 2.06743211894 27.826666666666668 28.56
Iter - 58  :Cost, training accuracy, val accuracy: ) 2.08846688935 27.926666666666666 28.64
Iter - 59  :Cost, training accuracy, val accuracy: ) 2.10297579069 28.015555555555554 28.86
Iter - 60  :Cost, training accuracy, val accuracy: ) 2.0696062467 28.14 28.98
Iter - 61  :Cost, training accuracy, val accuracy: ) 2.07814829754 28.217777777777776 29.06
Iter - 62  :Cost, training accuracy, val accuracy: ) 2.09346384187 28.362222222222222 29.02
Iter - 63  :Cost, training accuracy, val accuracy: ) 2.08817310784 28.442222222222224 29.1
Iter - 64  :Cost, training accuracy, val accuracy: ) 2.04249521925 28.557777777777776 29.14
Iter - 65  :Cost, training accuracy, val accuracy: ) 2.07701552886 28.628888888888888 29.18
Iter - 66  :Cost, training accuracy, val accuracy: ) 2.07699294213 28.74888888888889 29.32
Iter - 67  :Cost, training accuracy, val accuracy: ) 2.0731301944 28.84888888888889 29.5
Iter - 68  :Cost, training accuracy, val accuracy: ) 2.03460186865 28.93111111111111 29.5
Iter - 69  :Cost, training accuracy, val accuracy: ) 2.08312130859 28.984444444444446 29.64
Iter - 70  :Cost, training accuracy, val accuracy: ) 2.08810529093 29.133333333333333 29.48
Iter - 71  :Cost, training accuracy, val accuracy: ) 2.05146383247 29.19333333333333 29.6
Iter - 72  :Cost, training accuracy, val accuracy: ) 2.09954365835 29.266666666666666 29.64
Iter - 73  :Cost, training accuracy, val accuracy: ) 2.04849636423 29.3 29.8
Iter - 74  :Cost, training accuracy, val accuracy: ) 2.04812480196 29.324444444444445 29.74
Iter - 75  :Cost, training accuracy, val accuracy: ) 2.04079533938 29.468888888888888 29.96
Iter - 76  :Cost, training accuracy, val accuracy: ) 2.05280044384 29.52888888888889 30.0
Iter - 77  :Cost, training accuracy, val accuracy: ) 2.00448571011 29.602222222222224 30.02
Iter - 78  :Cost, training accuracy, val accuracy: ) 2.04578305763 29.64666666666667 29.96
Iter - 79  :Cost, training accuracy, val accuracy: ) 2.04177743171 29.682222222222222 30.02
Iter - 80  :Cost, training accuracy, val accuracy: ) 2.05367117189 29.74888888888889 30.06
Iter - 81  :Cost, training accuracy, val accuracy: ) 2.03767420208 29.857777777777777 30.12
Iter - 82  :Cost, training accuracy, val accuracy: ) 2.02657356978 29.84888888888889 30.16
Iter - 83  :Cost, training accuracy, val accuracy: ) 2.08683045628 29.948888888888888 30.3
Iter - 84  :Cost, training accuracy, val accuracy: ) 2.03627569641 30.00888888888889 30.36
Iter - 85  :Cost, training accuracy, val accuracy: ) 2.02455635478 30.086666666666666 30.38
Iter - 86  :Cost, training accuracy, val accuracy: ) 2.04086335869 30.148888888888887 30.56
Iter - 87  :Cost, training accuracy, val accuracy: ) 2.0465720227 30.19333333333333 30.54
Iter - 88  :Cost, training accuracy, val accuracy: ) 2.04533620405 30.266666666666666 30.66
Iter - 89  :Cost, training accuracy, val accuracy: ) 2.03772466119 30.33111111111111 30.7
Iter - 90  :Cost, training accuracy, val accuracy: ) 2.04009778499 30.351111111111113 30.7
Iter - 91  :Cost, training accuracy, val accuracy: ) 2.0576911615 30.413333333333334 30.74
Iter - 92  :Cost, training accuracy, val accuracy: ) 2.03731423328 30.488888888888887 30.74
Iter - 93  :Cost, training accuracy, val accuracy: ) 2.01988962102 30.555555555555557 30.82
Iter - 94  :Cost, training accuracy, val accuracy: ) 2.00220797517 30.615555555555556 30.94
Iter - 95  :Cost, training accuracy, val accuracy: ) 2.01088649658 30.65111111111111 31.02
Iter - 96  :Cost, training accuracy, val accuracy: ) 2.02601161304 30.724444444444444 31.0
Iter - 97  :Cost, training accuracy, val accuracy: ) 2.00897881677 30.78 31.06
Iter - 98  :Cost, training accuracy, val accuracy: ) 2.03035729144 30.813333333333333 31.0
Iter - 99  :Cost, training accuracy, val accuracy: ) 2.02381627835 30.86888888888889 31.06
Iter - 100  :Cost, training accuracy, val accuracy: ) 2.00223181741 30.966666666666665 31.0

In [39]:
y_predicted = NN.predict(X_test)
save_predictions('ans1-uni', y_predicted)

In [40]:
# test if your numpy file has been saved correctly
loaded_y = np.load('ans1-uni.npy')
np.set_printoptions(threshold=np.nan)
print(loaded_y.shape)
loaded_y[:10]

(10, 10000)


array([[ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,
         0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
         0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
         0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
         0.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,
         1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  1.,  0.,  0.,  1.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.

## Part 2: Improving the performance

In [27]:
NN2 = NeuralNetwork(layer_dimensions, drop_prob=0, reg_lambda=0)
NN2.train(X_train, y_train, iters=1000, alpha=0.00001, batch_size=1000, print_every=10)

('(Cost, training accuracy, val accuracy: )', 2.3025850929940459, 0.03868970853752902, 0.23201856148491878)
('(Cost, training accuracy, val accuracy: )', 2.3025850929940459, 0.03224142378127418, 0.23201856148491878)
('(Cost, training accuracy, val accuracy: )', 2.3025850929940459, 0.03224142378127418, 0.1740139211136891)
('(Cost, training accuracy, val accuracy: )', 2.3025850929940459, 0.025793139025019344, 0.1740139211136891)


KeyboardInterrupt: 

In [None]:
y_predicted2 = NN2.predict(X)
save_predictions(y_predicted, 'ans2-uni')

Write down results for Part 2 here:
...

In [126]:
import numpy as np
a = np.array([0.7,0.8,0.6,0.3]).reshape(2,2)
# = np.arange(2*4*2).reshape((2,4,2))
# = np.arange(2*2*4).reshape((2,2,2))
#print("A = ", (np.random.rand(*a.shape)<0.3)/0.3)
#print("B = ", b)

#print("Dot = ", np.dot(a,b))

#print("Matmul = ", np.matmul(a,b))

#np.random.seed(0);
#np.random.rand(4)
#10*np.random.uniform()
#np.random.normal(0,2,(10,10))


0.15606064446828216

In [152]:
import numpy as np
bias = (np.random.uniform()/10)*np.ones((3,1)) 
# Random Initialisation of weights
weight = np.random.normal(0,10*np.random.uniform(),(3, 5)) 
param = [weight, bias]
print("weight", weight)
print("bias", bias)
print("param",param)

weight [[  7.82934072 -10.86774528   5.20450091  -5.36508355  -9.3388032 ]
 [ -5.70175423   5.57958156  -2.6834555   -1.45635979   2.89883828]
 [  3.90676038  -5.48807876   0.28179786  -0.53944225  -1.81890684]]
bias [[ 0.06658591]
 [ 0.06658591]
 [ 0.06658591]]
param [array([[  7.82934072, -10.86774528,   5.20450091,  -5.36508355,  -9.3388032 ],
       [ -5.70175423,   5.57958156,  -2.6834555 ,  -1.45635979,
          2.89883828],
       [  3.90676038,  -5.48807876,   0.28179786,  -0.53944225,
         -1.81890684]]), array([[ 0.06658591],
       [ 0.06658591],
       [ 0.06658591]])]
