In [1]:
import numpy as np

In [2]:
# création de l'exemple XOR

X = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]]).T

y = np.array([0, 1, 1, 0])

In [8]:
def newdataset(nbr_of_examples):
    X = np.random.randint(0, 2, size = (2, nbr_of_examples))

    y = X.sum(axis=0, keepdims = True)    
    y[y != 1] = 0
    
    return X,y

In [5]:
X, y = newdataset(10)

In [3]:
X.shape

(2, 4)

In [294]:
class neuron_net:
    
    
    def sigmoid (self, Z):
    
        A = 1 / (1+ np.exp(-Z))
    
        return A
    
    
    def relu (self, Z):
    
        A = np.maximum(0,Z)
    
        return A
    
    
    def relu_deriv (self, dA, Z):
    
        dZ = np.array(dA, copy = True)
        dZ[Z <= 0] = 0
        
        return dZ
    
    
    def sigmoid_deriv (self, dA, Z):
        
        s = self.sigmoid(Z)
        dZ = dA * s * (1 - s)
        
        return dZ

    
    def __init__(self, list_of_parameters):
        
        self.num_layers = len(list_of_parameters)
        
        self.list_of_parameters = list_of_parameters
        
        #np.random.seed(1)
        
        parameters = {}
        
        for layer in range(1, self.num_layers):
            
            parameters["W" + str(layer)] = np.random.randn(self.list_of_parameters[layer], self.list_of_parameters[layer - 1]) * np.sqrt(self.list_of_parameters[layer - 1])
            
            parameters["b" + str(layer)] = np.zeros((self.list_of_parameters[layer], 1))
            
        
        self.parameters = parameters
        
    
    def fit (self, X):
        
        self.m = X.shape[1]
        
        caches = {}
        
        caches["A0"] = X
        
        # relu n activations
        
        for layer in range(1, self.num_layers - 1):
            
            Z = np.dot(self.parameters["W" + str(layer)], caches["A" + str(layer - 1)]) + self.parameters["b" + str(layer)]
            A = self.relu(Z)
            
            caches["Z" + str(layer)] = Z
            caches["W" + str(layer)] = self.parameters["W" + str(layer)]
            caches["b" + str(layer)] = self.parameters["b" + str(layer)]
            caches["A" + str(layer)] = A
        
        
        # last sigmoid activation
        
        Z = np.dot(self.parameters["W" + str(self.num_layers - 1)], Z) + self.parameters["b" + str(self.num_layers - 1)]
        A = self.sigmoid(Z)
        
        caches["Z" + str(self.num_layers - 1)] = Z
        caches["W" + str(self.num_layers - 1)] = self.parameters["W" + str(self.num_layers - 1)]
        caches["b" + str(self.num_layers - 1)] = self.parameters["b" + str(self.num_layers - 1)]
        caches["A" + str(self.num_layers - 1)] = A
        
        self.caches = caches
        
        
    def comupte_cost (self, y):
        
        cost = (1 / self.m) * (-np.dot(y, np.log(self.caches["A" + str(self.num_layers - 1)]).T) - np.dot(1 - y, np.log(1 - self.caches["A" + str(self.num_layers - 1)]).T))
        
        self.cost = np.squeeze(cost)
        
    
    def backward (self, y):
        
        grads = {}
        
        lastLayer = self.num_layers - 1
        
        #sigmoid derivatives
        
        grads["dA" + str(lastLayer)] = - (np.divide( y, self.caches["A" + str(lastLayer)]) - np.divide( 1 - y, 1 - self.caches["A" + str(lastLayer)]))
        grads["dZ" + str(lastLayer)] = self.sigmoid_deriv(grads["dA" + str(lastLayer)], self.caches["Z" + str(lastLayer)]) 
        grads["dW" + str(lastLayer)] = np.dot(grads["dZ" + str(lastLayer)], self.caches["A" + str(lastLayer - 1)].T) / self.m
        grads["db" + str(lastLayer)] = np.sum(grads["dZ" + str(lastLayer)], axis = 1, keepdims = True) / self.m
        
        
        #relu n derivatives
        
        for l in reversed(range(1, self.num_layers - 1)):
            
            grads["dA" + str(l)] = np.dot(self.caches["W" + str(l + 1)].T, grads["dZ" + str(l + 1)])
            grads["dZ" + str(l)] = self.relu_deriv(grads["dA" + str(l)], self.caches["Z" + str(l)])
            grads["dW" + str(l)] = np.dot(grads["dZ" + str(l)], self.caches["A" + str(l - 1)].T) / self.m
            grads["db" + str(l)] = np.sum(grads["dZ" + str(l)], axis = 1, keepdims = True) / self.m
        
                                                
        self.grads = grads
        
        
    def update_param (self, learning_rate):
        
        for l in range(1, self.num_layers):
            
            self.parameters["W" + str(l)] = self.parameters["W" + str(l)] - learning_rate * self.grads["dW" + str(l)]
            self.parameters["b" + str(l)] = self.parameters["b" + str(l)] - learning_rate * self.grads["db" + str(l)]
    
    
    def train (self, X, y, learning_rate, num_iterations = 1000):
        
        cost_track = []
        
        for i in range(0, num_iterations):
            
            self.fit(X)
            
            self.comupte_cost(y)
            
            self.backward(y)
            
            self.update_param(learning_rate)
            
            if i % 100 == 0:
                cost_track.append(self.cost)
                print("Cost after %i iterations: %f" % (i, self.cost))
            
        self.cost_track = cost_track
        
        
    def predict (self, y):
        
        predictions = np.zeros((1,self.m))
        
        probability = self.caches["A" + str(self.num_layers - 1)]
        
        for i in range(0, probability.shape[1]):
            
            if probability[0,i] > 0.5:
                predictions[0,i] = 1
            else:
                predictions[0,i] = 0
                
        print ("Accuracy: " + str(np.sum((predictions == y) / self.m)))
                
        return predictions

In [306]:
nn = neuron_net([2,4,2,1])

In [307]:
nn.parameters

{'W1': array([[ 1.18228921,  2.18263939],
        [ 1.07311326,  1.25145005],
        [-1.24066342, -1.22723646],
        [-2.03770642,  1.74266901]]), 'b1': array([[0.],
        [0.],
        [0.],
        [0.]]), 'W2': array([[-0.50835974,  2.79968788, -1.56382337, -0.87501797],
        [ 0.19085017,  1.84290014,  0.12150039,  0.42224951]]), 'b2': array([[0.],
        [0.]]), 'W3': array([[0.02337351, 0.25058128]]), 'b3': array([[0.]])}

In [308]:
nn.fit(X)

In [309]:
nn.caches

{'A0': array([[0, 0, 1, 1],
        [0, 1, 0, 1]]),
 'Z1': array([[ 0.        ,  2.18263939,  1.18228921,  3.3649286 ],
        [ 0.        ,  1.25145005,  1.07311326,  2.3245633 ],
        [ 0.        , -1.22723646, -1.24066342, -2.46789988],
        [ 0.        ,  1.74266901, -2.03770642, -0.29503741]]),
 'W1': array([[ 1.18228921,  2.18263939],
        [ 1.07311326,  1.25145005],
        [-1.24066342, -1.22723646],
        [-2.03770642,  1.74266901]]),
 'b1': array([[0.],
        [0.],
        [0.],
        [0.]]),
 'A1': array([[0.        , 2.18263939, 1.18228921, 3.3649286 ],
        [0.        , 1.25145005, 1.07311326, 2.3245633 ],
        [0.        , 0.        , 0.        , 0.        ],
        [0.        , 1.74266901, 0.        , 0.        ]]),
 'Z2': array([[0.        , 0.86923687, 2.40335395, 4.7974575 ],
        [0.        , 3.4586957 , 2.20328067, 4.92613524]]),
 'W2': array([[-0.50835974,  2.79968788, -1.56382337, -0.87501797],
        [ 0.19085017,  1.84290014,  0.121500

In [310]:
nn.comupte_cost(y)

In [311]:
nn.cost

array(0.76259837)

In [312]:
nn.backward(y)

In [313]:
nn.grads

{'dA3': array([[ 2.        , -1.41188895, -1.54428858,  4.84406607]]),
 'dZ3': array([[ 0.5       , -0.291729  , -0.35245263,  0.79356186]]),
 'dW3': array([[0.67660731, 0.53090978]]),
 'db3': array([[0.16234506]]),
 'dA2': array([[ 0.01168675, -0.00681873, -0.00823806,  0.01854833],
        [ 0.12529064, -0.07310183, -0.08831803,  0.19885174]]),
 'dZ2': array([[ 0.        , -0.00681873, -0.00823806,  0.01854833],
        [ 0.        , -0.07310183, -0.08831803,  0.19885174]]),
 'dW2': array([[ 0.0094478 ,  0.00643577,  0.        , -0.0029707 ],
        [ 0.10128739,  0.06899623,  0.        , -0.03184807]]),
 'db2': array([[0.00087289],
        [0.00935797]]),
 'dA1': array([[ 0.        , -0.01048513, -0.01266762,  0.02852167],
        [ 0.        , -0.15380968, -0.18582529,  0.41839343],
        [ 0.        ,  0.00178139,  0.00215219, -0.00484574],
        [ 0.        , -0.0249007 , -0.0300838 ,  0.06773493]]),
 'dZ1': array([[ 0.        , -0.01048513, -0.01266762,  0.02852167],
      

In [314]:
nn.update_param(0.0075)

In [317]:
nn.train(X,y,0.009, 3000)

Cost after 0 iterations: 0.210254
Cost after 100 iterations: 0.201255
Cost after 200 iterations: 0.193623
Cost after 300 iterations: 0.185761
Cost after 400 iterations: 0.177718
Cost after 500 iterations: 0.170396
Cost after 600 iterations: 0.163879
Cost after 700 iterations: 0.157324
Cost after 800 iterations: 0.151600
Cost after 900 iterations: 0.145117
Cost after 1000 iterations: 0.139465
Cost after 1100 iterations: 0.134681
Cost after 1200 iterations: 0.129166
Cost after 1300 iterations: 0.124529
Cost after 1400 iterations: 0.119968
Cost after 1500 iterations: 0.116212
Cost after 1600 iterations: 0.111794
Cost after 1700 iterations: 0.108181
Cost after 1800 iterations: 0.104652
Cost after 1900 iterations: 0.100634
Cost after 2000 iterations: 0.097308
Cost after 2100 iterations: 0.094631
Cost after 2200 iterations: 0.091512
Cost after 2300 iterations: 0.088427
Cost after 2400 iterations: 0.085992
Cost after 2500 iterations: 0.083668
Cost after 2600 iterations: 0.080806
Cost after 27

In [318]:
nn.predict(y)

Accuracy: 1.0


array([[0., 1., 1., 0.]])