In [5]:
import numpy as np
from matplotlib import pyplot as plt

class Two_Layer_NN(object):
    def __init__(self, input_size, hidden_size, output_size, std=1e-1,
                W1 = None, b1=None, W2=None, b2=None):
        self.params = {}
        if W1 is not None:
            self.params['W1'] = W1
        else:
            self.params['W1'] = std * np.random.randn(input_size, hidden_size)
        if b1 is not None:
            self.params['b1'] = b1
        else:
            self.params['b1'] = std * np.random.randn(hidden_size)
        if W2 is not None:
            self.params['W2'] = W2
        else:
            self.params['W2'] = std * np.random.randn(hidden_size, output_size)
        if b2 is not None:
            self.params['b2'] = b2
        else:
            self.params['b2'] = std * np.random.randn(output_size)
            
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def sigmoid_grad(self,end_grad):
        return end_grad*(1-end_grad)
        
    
    def loss(self, X, y=None, reg=0.1):
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N, D = X.shape
        
        hidden = self.sigmoid(X.dot(W1)+b1)
        output = self.sigmoid(hidden.dot(W2)+b2)
        if y is None:
            return output
        
        loss = 0.5*(np.mean((output-y)**2) + reg*(np.sum(W1**2)+np.sum(W2**2)))
        grads = {}
        error = y-output
        d_out = error*self.sigmoid_grad(output)
        error_hidden = d_out.dot(W2.T)
        d_hidden = error_hidden * self.sigmoid_grad(hidden)
        
        dW1 = X.T.dot(d_hidden)
        dW1 -= reg*W1
        dW2 = hidden.T.dot(d_out)
        dW2 -= reg*W2
        
        db1 = np.sum(d_hidden)
        db2 = np.sum(d_out)
        
        grads['W1'] = dW1
        grads['b1'] = db1
        grads['W2'] = dW2
        grads['b2'] = db2
        return loss, grads
    
    def train(self, X, y, X_val=None, y_val=None,
                learning_rate=1e-2, reg=0.0, num_iters=100):
        history = np.zeros((2,num_iters//10))
        for it in range(num_iters):
            loss, grads = self.loss(X, y, reg=reg)
            for param_name in grads:
                self.params[param_name] += learning_rate * grads[param_name]
            if it%10 == 0:
                history[0,it//10] = it
                history[1,it//10] = loss
            
        return history
    
    def predict(self, X):
        return self.loss(X)
    
    def predict_loss(self, X, y):
        scores = self.loss(X)
        return np.mean((scores-y)**2)
        
    def get_weights():
        return self.params
        

In [6]:
model = Two_Layer_NN(2,5,1)
X_train = np.asarray([[1,1],[0,0],[1,0],[0,1]])
y_train = np.asarray([0,0,1,1]).reshape(-1,1)
history = model.train(X_train, y_train, learning_rate=1, num_iters=10000, reg=0.0)
print('The predictions are: ', (model.predict(X_train)))
print('The end loss is: %.6f' % model.predict_loss(X_train, y_train))
plt.plot(history[0],history[1])


TypeError: loss() missing 1 required positional argument: 'y'

In [143]:
import numpy as np
class Two_Layer_NN_Mult(object):
    def __init__(self, input_size, hidden_size, output_size, std=1e-1,
                W1 = None, b1=None, W2=None, b2=None):
        self.params = {}
        if W1 is not None:
            self.params['W1'] = W1
        else:
            self.params['W1'] = std * np.random.randn(input_size, hidden_size)
        if b1 is not None:
            self.params['b1'] = b1
        else:
            self.params['b1'] = std * np.random.randn(hidden_size)
        if W2 is not None:
            self.params['W2'] = W2
        else:
            self.params['W2'] = std * np.random.randn(hidden_size, output_size)
        if b2 is not None:
            self.params['b2'] = b2
        else:
            self.params['b2'] = std * np.random.randn(output_size)
            
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def sigmoid_grad(self,end_grad):
        return end_grad*(1-end_grad)
        
    
    def loss(self, X, y=None, reg=0.1):
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N, D = X.shape
        
        hidden = self.sigmoid(X.dot(W1)+b1)
        output = hidden.dot(W2)+b2
        if y is None:
            return output
        
        loss = np.mean((output-y)**2) + reg*(np.sum(W1**2)+np.sum(W2**2))
        grads = {}
        error = y-output
        d_out = error
        error_hidden = d_out.dot(W2.T)
        d_hidden = error_hidden * self.sigmoid_grad(hidden)
        
        dW1 = X.T.dot(d_hidden)
        dW2 = hidden.T.dot(d_out)
        
        db1 = np.sum(d_hidden)
        db2 = np.sum(d_out)
        
        grads['W1'] = dW1
        grads['b1'] = db1
        grads['W2'] = dW2
        grads['b2'] = db2
        return loss, grads
    
    def train(self, X, y, X_val=None, y_val=None,
                learning_rate=1e-2, reg=0.0, num_iters=100):
        
        for it in range(num_iters):
            loss, grads = self.loss(X, y, reg=reg)
            for param_name in grads:
                self.params[param_name] += learning_rate * grads[param_name]
            
        return
    
    def predict(self, X):
        return self.loss(X)
    
    def predict_loss(self, X, y):
        scores = self.loss(X)
        return np.mean((scores-y)**2)
        
        
        

In [144]:
model_mul = Two_Layer_NN_Mult(2,20,1)
X_train = np.random.randint(0,10,(100,2))
y_train = np.prod(X_train, 1).reshape(-1,1)

model_mul.train(X_train, y_train, learning_rate=0.01, num_iters=5000)
print(model.predict(X_train)[:10])
print(y_train[:10])



[[0.52311383]
 [0.98721382]
 [0.98175992]
 [0.99609823]
 [0.97941532]
 [0.99623333]
 [0.99594566]
 [0.99624023]
 [0.98175992]
 [0.99624023]]
[[24]
 [ 0]
 [ 0]
 [32]
 [ 4]
 [27]
 [45]
 [ 9]
 [ 0]
 [ 9]]


In [2]:
import numpy as np
class Two_Layer_NN(object):
    def __init__(self, input_size, hidden_size, output_size, std=1e-3):
        self.params = {}
        self.params['W1'] = std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = std * np.random.randn(hidden_size)
        self.params['W2'] = std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = std * np.random.randn(output_size)
    
    def loss(self, X, y=None, reg=0.1, structure='Relu'):
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N, D = X.shape
        
        scores_layer1 = X.dot(W1) + b1
        hidden = np.maximum(scores_layer1, 0)
        scores = hidden.dot(W2) + b2
        if y is None:
            return scores
        
        loss = 0.5*np.sum((scores-y)**2)/N + reg*(np.sum(W1**2)+np.sum(W2**2))
        
        grads = {}
        dldscores = scores-y
        
        dldb2 = np.sum(dldscores,axis=0)
        dldW2 = hidden.T.dot(dldscores)
        dldW2 += reg*(W2**2)
        
        dldh = dldscores.dot(W2.T)
        dldh[scores_layer1<0] = 0
        dldb1 = np.sum(dldh,axis=0)
        dldW1 = X.T.dot(dldh)
        dldW1 += reg*(W1**2)
        
        grads['W1'] = dldW1
        grads['b1'] = dldb1
        grads['W2'] = dldW2
        grads['b2'] = dldb2
        return loss, grads
    
    def train(self, X, y, X_val=None, y_val=None,
                learning_rate=1e-2, reg=0.0, num_iters=100):
        
        for it in range(num_iters):
            X_batch = X
            y_batch = y
            loss, grads = self.loss(X_batch, y_batch, reg=reg)
            for param_name in grads:
                self.params[param_name] -= learning_rate * grads[param_name]
        return
    
    def predict(self, X):
        y_pred = None
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        scores = np.maximum(np.maximum(0,X.dot(W1)+b1).dot(W2)+b2,0)
        y_pred = scores
        return y_pred
