In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [117]:
class FeedforwardNN():
    def __init__(self, layers):
        self._layers = layers
        self._params = {}
        self.grad = {}
        self.activation = {
            "relu" : self.relu,
            "sigmoid" : self.sigmoid,
        }
        self.derivations = {
            "relu" : self.d_relu,
            "sigmoid" : self.d_sigmoid
        }
        self.__init_weights()

    def __init_weights(self):
        self._params['W'] = {}
        self._params['B'] = {}
        for layer in (range(1, len(self._layers))):
            
            self._params['W'][layer] = np.random.rand(self._layers[layer], self._layers[layer-1])
            self._params['B'][layer] = np.zeros((self._layers[layer], 1)) #np.random.rand(self._layers[layer], 1)

    def sigmoid(self, val):
        return 1.0/(1 + np.exp(-val))
    
    def d_sigmoid(self, val):
        return np.exp(-val) / (np.exp(-val)+1)**2
    
    def relu(self, val):
        return np.where(val > 0, val, 0)

    def d_relu(self, val):
        return np.where(val > 0, 1, 0)

    def softmax(self, val):
        exps = np.exp(val - val.max())
        return exps / np.sum(exps, axis=0)
    
    def d_softmax(self, val):
        exps = np.exp(val - val.max())
        return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
    
    def cross_entropy(self, predictions, targets, epsilon=1e-9):
        predictions = np.clip(predictions.squeeze(), epsilon, 1. - epsilon)
        
        N = predictions.shape[0]
        ce = -np.sum(targets*np.log(predictions))/N
        return ce
    
    def predict(self, X):
        A = X.T
        
        for layer in range(1, len(self._layers)-1):
            Z = np.dot(self._params['W'][layer], A) + self._params['B'][layer]
            A = self.sigmoid(Z)
        Z = np.dot(self._params['W'][len(self._layers)-1], A) + self._params['B'][len(self._layers)-1]
        A = self.softmax(Z)
        return A
    
    def forward(self, X):
        grad = {
            'A' : {},
            'Z' : {},
        }
        A = X.T
        grad['A'][0] = A
        
        for layer in range(1, len(self._layers)-1):
            Z = np.dot(self._params['W'][layer], A) + self._params['B'][layer]
            A = self.sigmoid(Z)
            grad['A'][layer] = A
            grad['Z'][layer] = Z
        
        Z = np.dot(self._params['W'][len(self._layers)-1], A) + self._params['B'][len(self._layers)-1]
        A = self.softmax(Z)
        
        grad["A"][len(self._layers) - 1] = A
        grad["Z"][len(self._layers) - 1] = Z
        self.grad = grad
        
        return A
    
    def backwards(self, y_true):
        n_layers = len(self._layers)
        W = self._params['W']
        B = self._params['B']
        grad = self.grad
        
        gradients = {
            'Z' : {},
            'W' : {},
            'b' : {}
        }
        for layer in reversed(range(1,n_layers)):
            if layer == n_layers-1:
                dZ = grad['A'][n_layers-1].reshape(grad['Z'][n_layers-1].shape) - y_true.reshape(grad['Z'][n_layers-1].shape)
            else:
                dZ = np.dot(W[layer+1].T, dZ) * self.d_sigmoid(grad['Z'][layer])
            
            dW = np.dot(dZ, grad['A'][layer-1].T)
            db = dZ
            
            gradients['Z'][layer] = dZ
            gradients['W'][layer] = dW
            gradients['b'][layer] = db
        self.gradients = gradients
        return gradients
    
    def optimize(self, lr):
        for layer in range(1, len(self._layers)):
            before = self._params['W'][layer].copy()
            self._params['W'][layer] -= lr * self.gradients['W'][layer]
            self._params['B'][layer] -= lr * self.gradients['b'][layer]
    
    def reset_gradients(self):
        self.gradients = {}
        
    def train(self, data, labels, n_epochs, lr=0.01):
        error = []
        for epoch in range(n_epochs):
            epoch_loss = 0
            for x, y in zip(data,labels):
                
                pred = self.forward(x)
                epoch_loss = epoch_loss + self.cross_entropy(pred, y)
                self.backwards(y)
                self.optimize(lr)
                
            error.append(epoch_loss / len(labels))
        return error

In [119]:
simpleNet = FeedforwardNN([2,9,2])

# XOR function
data = [np.array([[0,0]]), np.array([[0,1]]), np.array([[1,0]]), np.array([[1,1]])]
labels = [np.array([[0,1]]), np.array([[1,0]]), np.array([[1,0]]), np.array([[0,1]])]

error = simpleNet.train(data, labels, 10000, lr=0.1)



In [120]:
pred = simpleNet.predict(np.array([[1,1]])).reshape(2,)
print(pred)
loss = simpleNet.cross_entropy(pred, np.array([[0,1]]))

print(loss)

[0.00124636 0.99875364]
0.0006235699783381693


In [83]:
def encodeData(data):
    res = np.zeros((data.size, data.max()+1))
    res[np.arange(data.size),data] = 1
    return res
    

In [84]:


from sklearn import datasets
iris = datasets.load_iris()
X = list(iris.data)
X = [np.array([x]) for x in X]
y = list(encodeData(iris.target))
y = [np.array([i]) for i in y]


In [92]:
nn = FeedforwardNN([4,5,3])
nn.train(X,y, 2000)


In [96]:
print(nn.predict(X[0]))
print(y[0])
print()
print(nn.predict(X[20]))
print(y[20])
print()
print(nn.predict(X[50]))
print(y[50])



[[9.98733027e-01]
 [8.71122760e-04]
 [3.95850000e-04]]
[[1. 0. 0.]]

[[9.98618231e-01]
 [9.57873367e-04]
 [4.23895378e-04]]
[[1. 0. 0.]]

[[0.00118682]
 [0.98815061]
 [0.01066257]]
[[0. 1. 0.]]


In [4]:
class FeedforwardNN():
    def __init__(self, layers):
        self._layers = layers
        self._params = {}
        self.grad = {}
        self.activation = {
            "relu" : self.relu,
            "sigmoid" : self.sigmoid,
        }
        self.derivations = {
            "relu" : self.d_relu,
            "sigmoid" : self.d_sigmoid
        }
        self.__init_weights()

    def __init_weights(self):
        self._params['W'] = {}
        self._params['B'] = {}
        for layer in (range(1, len(self._layers))):
            
            self._params['W'][layer] = np.random.rand(self._layers[layer], self._layers[layer-1])
            self._params['B'][layer] = np.zeros((self._layers[layer], 1)) #np.random.rand(self._layers[layer], 1)

    def sigmoid(self, val):
        return 1.0/(1 + np.exp(-val))
    
    def d_sigmoid(self, val):
        return np.exp(-val) / (np.exp(-val)+1)**2
    
    def relu(self, val):
        return np.where(val > 0, val, 0)

    def d_relu(self, val):
        return np.where(val > 0, 1, 0)

    def softmax(self, val):
        exps = np.exp(val - val.max())
        return exps / np.sum(exps, axis=0)
    
    def d_softmax(self, val):
        exps = np.exp(val - val.max())
        return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
    
    def cross_entropy_loss(self, Y_pred, Y_true): # Ta mean av loss function for å finne cost (Andrew Ng)
        Y_pred = Y_pred.squeeze()
        Y_true = Y_true.squeeze()
        res = - (Y_true*(np.log(Y_pred)) + (1-Y_true)*np.log(1-Y_pred))
        res = np.expand_dims(res, axis=0)
        return res
    
    
    def forward_prop(self, X):
        grad = {
            'A' : {},
            'Z' : {},
        }
        A = np.expand_dims(X, axis=0).T
        grad['A'][0] = A
        
        for layer in range(1, len(self._layers)-1):
            Z = np.dot(self._params['W'][layer], A) + self._params['B'][layer]
            A = self.sigmoid(Z)
            grad['A'][layer] = A
            grad['Z'][layer] = Z
        
        Z = np.dot(self._params['W'][len(self._layers)-1], A) + self._params['B'][len(self._layers)-1]
        A = self.softmax(Z)
        
        grad["A"][len(self._layers) - 1] = A
        grad["Z"][len(self._layers) - 1] = Z
        self.grad = grad
        
        return A
    
    def backpropagation(self, y_true, y_pred):
        n_layers = len(self._layers)
        W = self._params['W']
        B = self._params['B']
        grad = self.grad
        
        gradients = {
            'Z' : {},
            'W' : {},
            'b' : {}
        }
        
        for layer in reversed(range(1,n_layers)):
            if layer == n_layers-1:
                dZ = grad['A'][n_layers-1].reshape(grad['Z'][n_layers-1].shape) - y_true.reshape(grad['Z'][n_layers-1].shape)
            else:
                dZ = np.dot(W[layer+1].T, dZ) * self.d_sigmoid(grad['Z'][layer])
            
            dW = np.dot(dZ, grad['A'][layer-1].T)
            db = dZ
            
            gradients['Z'][layer] = dZ
            gradients['W'][layer] = dW
            gradients['b'][layer] = db
            
        return gradients


    def backpropagation_testing(self, y_true, y_pred):
        
        n_layers = len(self._layers)
        W = self._params['W']
        B = self._params['B']
        grad = self.grad
        
        print("W")
        [print("  ", i, x.shape) for i,x in W.items()]
        print("A")
        [print("  ", i, x.shape) for i,x in grad['A'].items()]
        print("Z")
        [print("  ", i, x.shape) for i,x in grad['Z'].items()]
        
        print()
        print("-----Testing------")
        
        dZ2 = grad['A'][2].reshape(grad['Z'][2].shape) - y_true.reshape(grad['Z'][2].shape)
        print(dZ2.shape)
        
        dW2 = np.dot(dZ2, grad['A'][1].T)
        print(dW2.shape)
        
        dB2 = dZ2
        print(dB2.shape)
        
        dZ1 = np.dot(W[2].T, dZ2) * self.d_sigmoid(grad['Z'][1])
        print(dZ1.shape)
        
        dW1 = np.dot(dZ1, grad['A'][0].T)
        print(dW1.shape)
        
        dB1 = dZ1
        print(dB1.shape)
        
        print()
        
        gradients = {
            'Z' : {},
            'W' : {},
            'b' : {}
        }
        
        for layer in reversed(range(1,n_layers)):
            if layer == n_layers-1:
                dZ = grad['A'][2].reshape(grad['Z'][n_layers-1].shape) - y_true.reshape(grad['Z'][n_layers-1].shape)
            else:
                dZ = np.dot(W[layer+1].T, dZ2) * self.d_sigmoid(grad['Z'][layer])
            
            dW = np.dot(dZ, grad['A'][layer-1].T)
            db = dZ
            
            gradients['Z'][layer] = dZ
            gradients['W'][layer] = dW
            gradients['b'][layer] = db
            
        return gradients
        
# w[2] = (n[2], n[1]) dimensional
# Z[2],dZ[2] = (n[2], 1)
# Z[1],dZ[1] = (n[1], 1)

#W.shape == dW.shape
#Z.shape == dZ.shape




In [5]:
simpleNet = FeedforwardNN([3,4, 4,2])
#simpleNet._params['W'][1] = np.array([1.0,1.0,1.0,1.0,1.0,1.0]).reshape(3,2)
#simpleNet._params['W'][2] = np.array([1.0,1.0,1.0,1.0,1.0,1.0]).reshape(2,3)


out = simpleNet.forward_prop(np.array([1,1,1]))
#print(out)

#print(simpleNet.cross_entropy_loss(out, np.array([1,])))

simpleNet.backpropagation(np.array([1, 2]).squeeze(), out.squeeze())

{'Z': {3: array([[-0.32877173],
         [-1.67122827]]),
  2: array([[-0.24394369],
         [-0.31304895],
         [-0.06790937],
         [-0.09004923]]),
  1: array([[-0.0226095 ],
         [-0.02705689],
         [-0.04391744],
         [-0.0364433 ]])},
 'W': {3: array([[-0.22936223, -0.25064715, -0.25726896, -0.25982073],
         [-1.1659051 , -1.27410164, -1.30776197, -1.32073324]]),
  2: array([[-0.2030698 , -0.21530939, -0.19333892, -0.19907659],
         [-0.26059615, -0.27630302, -0.24810867, -0.25547174],
         [-0.05653084, -0.05993811, -0.05382194, -0.05541921],
         [-0.07496107, -0.07947918, -0.07136902, -0.07348702]]),
  1: array([[-0.0226095 , -0.0226095 , -0.0226095 ],
         [-0.02705689, -0.02705689, -0.02705689],
         [-0.04391744, -0.04391744, -0.04391744],
         [-0.0364433 , -0.0364433 , -0.0364433 ]])},
 'b': {3: array([[-0.32877173],
         [-1.67122827]]),
  2: array([[-0.24394369],
         [-0.31304895],
         [-0.06790937],
       

In [805]:
a = np.random.rand(1,3)
b = np.random.rand(3)

print(b.shape)
b = np.expand_dims(b, axis=0)
print(b.shape)

(3,)
(1, 3)


In [None]:
        #error = {}
        #delta = {}
       # 
        #error[n_layers-1] = np.expand_dims(y_true.squeeze()-y_pred.squeeze(), axis=0)
        #delta[n_layers] = np.multiply(error[n_layers-1], self.d_sigmoid(y_pred))
       # 
        #for layer in reversed(range(1, len(self._layers))):
        ##    print(layer)
        #    print(W[layer].shape, delta[layer+1].shape)
        #    print(np.dot(delta[layer+1], W[layer]).shape, self.d_sigmoid(grad['A'][layer]).shape)
        #    delta[layer] = np.multiply(np.dot(delta[layer+1], W[layer]), self.d_sigmoid(grad['A'][layer]))
            
            
            #error[layer] = np.dot(delta[layer+1], W[layer])
            #delta[layer] = np.multiply(error[layer], self.d_sigmoid(grad['A'][layer]))
            
            
            #error[layer] = np.dot( error[layer + 1], np.dot(self._params['W'][layer], self.derivations['sigmoid'](self.grad['A'][layer])))
            #print(delta[layer].shape)
            
            #e_prev_layer = np.expand_dims(error[layer + 1], axis=0)
            
            #error[layer] = e_prev_layer * self._params['W'][layer] #* self.derivations['sigmoid'](self.grad['A'][layer])
        
        #error = {}
        #delta = {}
        
        #Output layer
        #error[len(self._layers)-1] = y_true - y_pred
        #delta[len(self._layers)-1] = error[len(self._layers)-1] * self.derivations['sigmoid'](y_pred)
        
        #for layer in reversed(range(len(self._layers)-1)):
        #    error[layer] = np.dot(self._params['W'][layer+1].T, error[layer+1])
        #    delta[layer] = error[layer+1] * self.derivations['sigmoid'](self.grad['A'][layer])
        #    
        #    print("layer", layer)
        #    
        #    print(delta[layer].shape)
        
        
            print("-----Testing------")
        
        print(n_layers)
        
        dZ2 = grad['A'][2].reshape(grad['Z'][2].shape) - y_true.reshape(grad['Z'][2].shape)
        print(dZ2)
        
        dW2 = np.dot(dZ2, grad['A'][1].T)
        print(dW2.shape)
        
        dB2 = dZ2
        print(dB2.shape)
        
        dZ1 = np.dot(W[2].T, dZ2) * self.d_sigmoid(grad['Z'][1])
        print(dZ1.shape)
        
        dW1 = np.dot(dZ1, grad['A'][0].T)
        print(dW1.shape)
        
        dB1 = dZ1
        print(dB1.shape)