In [286]:
import numpy as np
import matplotlib.pyplot as plt

In [288]:
def sigmoid(x):
    y = 1 / (1 + np.exp(-x))
    return y

def d_sigmoid(y):
    dy = y * (1 - y)
    return dy

def relu(x):
    y = np.maximum(0, x)
    return y

def d_relu(y):
    dy = (y > 0).astype(np.int)
    return dy


class NN():
    def __init__(self, layers, alpha=1, random_state=0, n_steps=1000):
        np.random.seed(random_state)
        self.layers = layers
        self.N = len(layers) - 1
        self.alpha = alpha
        self.n_steps = n_steps
        self.weights = [np.random.normal(0, 1, (a, b)).T for a, b in np.c_[self.layers[1:], self.layers[:-1]]]
        self.biases = [np.zeros(a) for a in self.layers[1:]]
        self.h = sigmoid
        self.dh = d_sigmoid
        self.E = []
       
    
    def predict(self, X):
        for i in range(self.N):
            X = self.h(np.dot(X, self.weights[i]) + self.biases[i])
        return X
    
    
    def train(self, X, y):
        for _ in range(self.n_steps):
            # Forward propagation
            A = [X]
            for i in range(self.N):
                A.append(self.h(np.dot(A[-1], self.weights[i]) + self.biases[i]))

            error2 = 2 * (A[-1] - y)
            self.E.append(np.square(error2).mean())
            
            delta2 = error2 * self.dh(A[-1])
            dW2 = np.dot(A[-2].T, delta2)
            
            error1 = delta2.dot(self.weights[-1].T)
            
            delta1 = error1 * self.dh(A[-2])
            dW1 = A[-3].T.dot(delta1)
            
            self.weights[-1] -= self.alpha * dW2
            self.weights[-2] -= self.alpha * dW1
        
'''
X = np.array([[0,0],
              [1,0],
              [0,1],
              [1,1]])  

y = np.array([[0,1,1,0]]).T
        
nn = NN((2,28,1), n_steps=1000, alpha=1)
nn.train(X, y)

plt.plot([np.square(e).mean() for e in nn.E])
nn.predict(X)
'''        


from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

iris = load_iris()

X = iris['data']
y = iris['target']

y_oh = np.zeros((y.size, 3))

for i in range(y.size):
    y_oh[i][y[i]] = 1
    

nn = NN((4,28,3), n_steps=10000, alpha=0.0001)

nn.train(X, y_oh)

y_hat = [row.argmax() for row in nn.predict(X)]

accuracy_score(y_hat, y)

0.9733333333333334

In [301]:
def sigmoid(x):
    y = 1 / (1 + np.exp(-x))
    return y

def d_sigmoid(y):
    dy = y * (1 - y)
    return dy

def relu(x):
    y = np.maximum(0, x)
    return y

def d_relu(y):
    dy = (y > 0).astype(np.int)
    return dy


class NN():
    def __init__(self, layers, alpha=1, random_state=0, n_steps=1000):
        np.random.seed(random_state)
        self.layers = layers
        self.N = len(layers) - 1
        self.alpha = alpha
        self.n_steps = n_steps
        self.weights = [np.random.normal(0, 1, (a, b)).T for a, b in np.c_[self.layers[1:], self.layers[:-1]]]
        self.biases = [np.zeros(a) for a in self.layers[1:]]
        self.h = sigmoid
        self.dh = d_sigmoid
        self.mse = []
       
    
    def predict(self, X):
        for i in range(self.N):
            X = self.h(np.dot(X, self.weights[i]) + self.biases[i])
        return X
    
    
    def train(self, X, y):
        for _ in range(self.n_steps):
            # Forward propagation
            A = [X]
            for i in range(self.N):
                A.append(self.h(np.dot(A[-1], self.weights[i]) + self.biases[i]))
                
            delta = [None for _ in range(self.N)]
            dW = delta.copy()
            
            for i in range(-1, -self.N-1, -1):
                if i == -1:
                    error = 2 * (A[-1] - y)
                    self.mse.append(error)
                else:
                    error = delta[i+1].dot(self.weights[i+1].T)
                    
                delta[i] = error * self.dh(A[i])
                dW[i] = A[i-1].T.dot(delta[i])
                
            for i in range(-1, -self.N-1, -1):
                self.weights[i] -= self.alpha * dW[i]

            #error2 = 2 * (A[-1] - y)
            #self.E.append(np.square(error2).mean())
            
            #delta2 = error2 * self.dh(A[-1])
            #dW2 = np.dot(A[-2].T, delta2)
            
            #error1 = delta2.dot(self.weights[-1].T)
            
            #delta1 = error1 * self.dh(A[-2])
            #dW1 = A[-3].T.dot(delta1)
            
            #self.weights[-1] -= self.alpha * dW2
            #self.weights[-2] -= self.alpha * dW1
        
'''
X = np.array([[0,0],
              [1,0],
              [0,1],
              [1,1]])  

y = np.array([[0,1,1,0]]).T
        
nn = NN((2,28,1), n_steps=1000, alpha=1)
nn.train(X, y)

plt.plot([np.square(e).mean() for e in nn.E])
nn.predict(X)
'''        


from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

iris = load_iris()

X = iris['data']
y = iris['target']

y_oh = np.zeros((y.size, 3))

for i in range(y.size):
    y_oh[i][y[i]] = 1
    

nn = NN((4,28,28,3), n_steps=10000, alpha=0.0001)

nn.train(X, y_oh)

y_hat = [row.argmax() for row in nn.predict(X)]

accuracy_score(y_hat, y)

0.9733333333333334

In [302]:
nn.mse

[array([[-0.24808787,  1.97164678,  1.95237731],
        [-0.2705067 ,  1.97715845,  1.94995761],
        [-0.26604817,  1.97167674,  1.95098569],
        [-0.26113288,  1.9722399 ,  1.94809494],
        [-0.24492492,  1.96795987,  1.95195187],
        [-0.23889342,  1.97122545,  1.94980964],
        [-0.26617775,  1.96712291,  1.94853002],
        [-0.24886958,  1.97228531,  1.950763  ],
        [-0.27448359,  1.97303708,  1.94775917],
        [-0.25687287,  1.9746344 ,  1.95074952],
        [-0.23677683,  1.97210768,  1.95301308],
        [-0.24483274,  1.96933639,  1.94842422],
        [-0.26460937,  1.9749343 ,  1.95115258],
        [-0.28076631,  1.96781277,  1.95132727],
        [-0.23274504,  1.97122452,  1.95696046],
        [-0.23134065,  1.96413216,  1.95362376],
        [-0.25321832,  1.97040871,  1.95326598],
        [-0.255998  ,  1.97300942,  1.95132441],
        [-0.23608746,  1.97559871,  1.95111175],
        [-0.24324352,  1.96669787,  1.95086814],
        [-0.24705727