In [2]:
import numpy as np
from matplotlib import pyplot as plt

class Two_Layer_NN(object):
    def __init__(self, input_size, hidden_size, output_size, std=1e-1,
                W1 = None, b1=None, W2=None, b2=None):
        self.params = {}
        if W1 is not None:
            self.params['W1'] = W1
        else:
            self.params['W1'] = std * np.random.randn(input_size, hidden_size)
        if b1 is not None:
            self.params['b1'] = b1
        else:
            self.params['b1'] = std * np.random.randn(hidden_size)
        if W2 is not None:
            self.params['W2'] = W2
        else:
            self.params['W2'] = std * np.random.randn(hidden_size, output_size)
        if b2 is not None:
            self.params['b2'] = b2
        else:
            self.params['b2'] = std * np.random.randn(output_size)
            
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def sigmoid_grad(self,end_grad):
        return end_grad*(1-end_grad)
    
    def relu(self,x): 
        return np.maximum(0, x)  
    
   # def relu_grad(self,output):
   #     return 1.0 / (2 * k) * np.log(1 + np.exp(2*k*x))

  
    #Adjust the reg hyper parameter here    
    def loss(self, X, y=None, reg=0.1):
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N, D = X.shape
        
        #forward pass
        hidden = self.sigmoid(X.dot(W1)+b1)
        output = self.sigmoid(hidden.dot(W2)+b2)
        #hidden = self.relu(X.dot(W1)+b1)
        #output = self.relu(hidden.dot(W2)+b2)
        if y is None:
            return output
        
        loss = 0.5*(np.mean((output-y)**2) + reg*(np.sum(W1**2)+np.sum(W2**2)))
        grads = {}
        error = y-output
        #d_out = error*self.relu_grad(output)
        d_out = error*self.sigmoid_grad(output)
        error_hidden = d_out.dot(W2.T)
        #d_hidden = error_hidden * self.relu_grad(hidden)
        d_hidden = error_hidden * self.sigmoid_grad(hidden)
        
        dW1 = X.T.dot(d_hidden)
        dW1 -= reg*W1
        dW2 = hidden.T.dot(d_out)
        dW2 -= reg*W2
        
        db1 = np.sum(d_hidden)
        db2 = np.sum(d_out)
        
        grads['W1'] = dW1
        grads['b1'] = db1
        grads['W2'] = dW2
        grads['b2'] = db2
        return loss, grads
    
#Adjust the hyper parameter learning rate here
    def train(self, X, y, X_val=None, y_val=None,
                learning_rate = 0.5, reg = 0.1, num_iters = 10000):
        history = np.zeros((2,num_iters//10))
        for it in range(num_iters):
            loss, grads = self.loss(X, y, reg=reg)
            for param_name in grads:
                self.params[param_name] += learning_rate * grads[param_name]
            if it%10 == 0:
                history[0,it//10] = it
                history[1,it//10] = loss
            
        return history
    
    def predict(self, X):
        return self.loss(X)
    
    def predict_loss(self, X, y):
        scores = self.loss(X)
        return np.mean((scores-y)**2)
        
    def get_weights():
        return self.params
    
    
    def accuracy(self, actual, predicted):
        return np.mean((predicted==actual))

In [3]:
def random_Generator(low, high, num):
    xArr= np.random.random_integers(low, high, size = (num,1))
    yArr= np.random.random_integers(low, high, size = (num,1))
    inputAns = np.multiply(xArr,yArr)
    inputArr= np.concatenate((xArr, yArr), axis=1)

    inputArr = inputArr.astype(float)
    inputAns = inputAns.astype(float)
    
    for x in np.nditer(inputArr, op_flags=['readwrite']):
         x[...] = x/100

    for x in np.nditer(inputAns, op_flags=['readwrite']):
          x[...] = x/100  
    
    return inputArr, inputAns 

In [5]:
# Multiplication
I = 10000 #iteration
reg = 0.1 #reg
L = 1e-2 #learning rate
model2 = Two_Layer_NN(2,2,1)
X_train,y_train = random_Generator(-100,100,1000)

history2 = model2.train(X_train, y_train, learning_rate= 1, reg=0.0, num_iters=10000)
print('The predictions are: ', (model2.predict(X_train)))
print('The accuracy is: ', (model2.accuracy(y_train, model2.predict(X_train))))
print('The end loss is: %.6f' % model2.predict_loss(X_train, y_train))
plt.plot(history2[0],history2[1])

  from ipykernel import kernelapp as app
  app.launch_new_instance()


The predictions are:  [[  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149714e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149714e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.38149715e-276]
 [  6.381497

[<matplotlib.lines.Line2D at 0x10cd78be0>]

In [38]:
# XOR
I = 10000 #iteration
reg = 0.1 #reg
L = 1e-2 #learning rate
model = Two_Layer_NN(2,2,1)
X_train = np.asarray([[1,1],[0,0],[1,0],[0,1]])
y_train = np.asarray([0,0,1,1]).reshape(-1,1)
print(y_train)
history = model.train(X_train, y_train, learning_rate= 1, reg=0.0, num_iters=10000)
print('The predictions are: ', (model.predict(X_train)))
print('The accuracy is: ', (model.accuracy(y_train, model.predict(X_train))))
print('The end loss is: %.6f' % model.predict_loss(X_train, y_train))
plt.plot(history[0],history[1])

[[0]
 [0]
 [1]
 [1]]
The predictions are:  [[ 0.02088493]
 [ 0.50022913]
 [ 0.49904337]
 [ 0.98420326]]
The accuracy is:  0.0
The end loss is: 0.125468


[<matplotlib.lines.Line2D at 0x110fbe240>]