In [249]:
import numpy as np
from funcs import softmax,sigmoid, sigmoid_grad,gradcheck_naive
import random
import matplotlib.pyplot as plt
np.random.seed(0)

import copy

In [250]:
class FNN(object):
    """
    Implements 3 layer feed forward neural network
    
    Input + Hidden + Output
    Sigmoid + SoftMax
    """

    def __init__(self, dimensions,initial_params):
        
        self.dimensions=dimensions
        self.params=initial_params#np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], )

        ofs = 0
        self.Dx, self.H, self.Dy = (dimensions[0], dimensions[1], dimensions[2])

        self.W1 = np.reshape(self.params[ofs:ofs+ self.Dx * self.H], (self.Dx, self.H))
        ofs += self.Dx * self.H
        self.b1 = np.reshape(self.params[ofs:ofs + self.H], (1, self.H))
        ofs += self.H
        self.W2 = np.reshape(self.params[ofs:ofs + self.H * self.Dy], (self.H, self.Dy))
        ofs += self.H * self.Dy
        self.b2 = np.reshape(self.params[ofs:ofs + self.Dy], (1, self.Dy))
    
    def update_weights(self,gradient):
        self.params -= gradient
        
        ofs = 0
        self.Dx, self.H, self.Dy = (self.dimensions[0], self.dimensions[1], self.dimensions[2])

        self.W1 = np.reshape(self.params[ofs:ofs+ self.Dx * self.H], (self.Dx, self.H))
        ofs += self.Dx * self.H
        self.b1 = np.reshape(self.params[ofs:ofs + self.H], (1, self.H))
        ofs += self.H
        self.W2 = np.reshape(self.params[ofs:ofs + self.H * self.Dy], (self.H, self.Dy))
        ofs += self.H * self.Dy
        self.b2 = np.reshape(self.params[ofs:ofs + self.Dy], (1, self.Dy))
    
    def predict(self,x):
        h = sigmoid(np.dot(data,self.W1) + self.b1)
        yhat = softmax(np.dot(h,self.W2) + self.b2)

    def train(self, x, y,epoch=50):
        
        cost_history=[]
        for i in range(epoch):
            ### forward propagation
            h = sigmoid(np.dot(x,self.W1) + self.b1)
            yhat = softmax(np.dot(h,self.W2) + self.b2)

            # calculate cost
            cost = np.sum(-np.log(yhat[labels==1])) / data.shape[0]
            cost_history.append(cost)
            #=====backward propagation=====
            d3 = (yhat - labels) / data.shape[0]
            gradW2 = np.dot(h.T, d3)
            gradb2 = np.sum(d3,0,keepdims=True)

            dh = np.dot(d3,self.W2.T)
            grad_h = sigmoid_grad(h) * dh

            gradW1 = np.dot(data.T,grad_h)
            gradb1 = np.sum(grad_h,0)

            ### Stack gradients (do not modify)
            grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),gradW2.flatten(), gradb2.flatten()))


            self.update_weights(grad)
        


        return cost,cost_history



In [251]:
N = 20
dimensions = [10, 5, 10]
data = np.random.randn(N, dimensions[0])   # each row will be a datum
labels = np.zeros((N, dimensions[2]))

for i in range(N):
    labels[i, random.randint(0,dimensions[2]-1)] = 1

In [252]:

def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### forward propagation
    h = sigmoid(np.dot(data,W1) + b1)
    yhat = softmax(np.dot(h,W2) + b2)
    #################################

    ### YOUR CODE HERE: backward propagation
    cost = np.sum(-np.log(yhat[labels==1])) / data.shape[0]

    d3 = (yhat - labels) / data.shape[0]
    gradW2 = np.dot(h.T, d3)
    gradb2 = np.sum(d3,0,keepdims=True)

    dh = np.dot(d3,W2.T)
    grad_h = sigmoid_grad(h) * dh

    gradW1 = np.dot(data.T,grad_h)
    gradb1 = np.sum(grad_h,0)
    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
        gradW2.flatten(), gradb2.flatten()))

    return cost, grad

In [253]:
initial_params=np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], )

In [254]:
model=FNN(dimensions,copy.deepcopy(initial_params))

cost,hcost=model.train(data,labels)
cost

0.9608410135525742

In [255]:
for i in range(50):
    cost, gradient =forward_backward_prop(data, labels, initial_params, dimensions)
    initial_params -= gradient

In [256]:
cost

0.9608410135525742