## 1 - Packages : Abdelazyz RKHISS

In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from scipy.misc import derivative
from tensorflow import keras

plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

In [2]:
def relu(z):
    cache = z
    A = np.maximum(0, z)
    return A, cache
                
def relu_derivate(dA, activation_cache):
    z=activation_cache 
    dZ = np.array(dA, copy=True)
    dZ[z <= 0] = 0
    return dZ

In [3]:
def sigmoid(z):
    cache = z
    A=1/(1+np.exp(-z))
    return A, cache

def sigmoid_derivate(dA, activation_cache):
    z = activation_cache 
    s = 1/(1+np.exp(-z))
    dZ = dA * s * (1-s)
    return dZ


def softmax(z):
    cache=z
    exps = np.exp(z - z.max())
    sums = np.sum(exps)
    
    return np.divide(exps, sums)

def softmax_derivate(dA, activation_cache):
    z=activation_cache
    dZ = Softmax(z)*np.sum((dA*(dA*softmax(z))), axis=1)
    return dZ
    

In [17]:
def initialisation(dimension_layer):
    parameters = {}
    L = len(dimension_layer)
    for l in range(1, L):
        ####question pq W1.shape ==> (4,13)
        parameters['W' + str(l)] = np.random.randn(dimension_layer[l], dimension_layer[l-1])*0.0251
        parameters['b' + str(l)] = np.zeros((dimension_layer[l],1))
    return parameters

In [18]:
def lineaire_forward(A, W,b):
    Z = np.dot(W,A)+b
    assert(Z.shape == (W.shape[0], A.shape[1]))
    
    cache = (A, W, b)
    
    return Z, cache

In [19]:
def linear_activation_forward(A_prev, W, b, activation):
    
    if activation == "softmax":
        Z, linear_cache=lineaire_forward(A_prev,W,b)
        A, activation_cache=softmax(Z)
        
    elif activation == "relu" :
        Z, linear_cache = lineaire_forward(A_prev,W,b)
        A, activation_cache=relu(Z)
        
    cache = (linear_cache, activation_cache)

    return A, cache

In [20]:
def model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2
    
    for l in range(1, L):
        A_prev = A 
        A,cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b'+str(l)], activation="relu")
        caches.append(cache)
        
    AL, cache = linear_activation_forward(A, parameters['W'+str(L)] , parameters['b'+str(L)], activation="sigmoid")
    
    caches.append(cache)
   
    
    return AL, caches

In [21]:
def calcule_cost(AL, Y):
    m = Y.shape[0]
    cost=(-1/m) *np.sum((Y*np.log(AL)+(1-Y)*np.log(1-AL)))
    cost = np.squeeze(cost)
    return cost

In [22]:
def linear_backword(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    
    dW=1/m*(np.dot(dZ,A_prev.T))
    db=1/m*(np.sum(dZ,axis=1,keepdims=True))
    dA_prev=np.dot(W.T,dZ)
    
    return dA_prev, dW, db

In [23]:
def linear_activation_backward(dA, cache, activation):
    
    linear_cache, activation_cache= cache
    
    if activation == "relu":
            
        dZ=relu_derivate(dA,activation_cache)
        dA_prev, dW, db = linear_backword(dZ, linear_cache)
        
        
    elif activation == "softmax":

        dZ=sigmoid_derivate(dA,activation_cache)
        dA_prev, dW, db = softmax_derivate(dZ, linear_cache)
        
    return dA_prev, dW, db

In [24]:
def model_backward(AL, Y, caches):
    
    gradients = {}
    L = len(caches) #le nombre des couches layers
    m = AL.shape[1]
    #Y = Y.reshape(AL.shape)
    
    dAL = - (np.divide(Y, AL+0.01) - np.divide(1 - Y, (1 - AL)+0.01))
    
    current_cache=caches[L-1]
    
    dA_prev, dW, db = linear_activation_backward(dAL, current_cache,activation="sigmoid")
    
    gradients["dA"+ str(L-1)]=dA_prev
    gradients["dW"+ str(L)]=dW
    gradients["db"+ str(L)]=db
    
    for l in reversed(range(L-1)):
        
        current_cache=caches[l]
        dA_prev, dW, db = linear_activation_backward(gradients["dA"+ str(l+1)], current_cache, activation = "relu")
    
        gradients["dA"+ str(l)]=dA_prev
        gradients["dW"+ str(l+1)]=dW
        gradients["db"+ str(l+1)]=db
    
    return gradients

In [25]:
def miseajours_poids(parameters, gradients, learning_rate=0.01):
    
    L = len(parameters) // 2
    
    for l in range(L):
        parameters["W"+ str(l+1)]=parameters["W"+ str(l+1)]-learning_rate*gradients["dW"+ str(l+1)]
        parameters["b"+ str(l+1)]=parameters["b"+ str(l+1)]-learning_rate*gradients["db"+ str(l+1)]

    return parameters

In [26]:
def modelpredict(X, parameters):
    ypred, caches = model_forward(X, parameters)
    return ypred

In [4]:
from sklearn.model_selection import train_test_split
from sklearn import datasets
iris=datasets.load_iris()


xtrain, xtest, ytrain, ytest = train_test_split(iris.data, iris.target)


ytraint=ytrain.reshape(112,1)
ytest=ytest.reshape(38,1)

In [15]:
ytrain[0]

0

In [22]:
ytrain=np.array(ytrain)

y_train=list()

for i in range(ytrain.shape[0]):
        
    if(ytrain[i]==0):
        y_train.append(np.array([1, 0, 0]))
        
    elif(ytrain[i]==1):
        y_train.append(np.array([0, 1, 0]))
        
    else:
        y_train.append(np.array([0, 0, 1]))
        
y_train=np.array(y_train)
        

In [28]:
for i in range(500):
    AL, caches = model_forward(xtrain, parameters)
    gradients  = model_backward(AL, ytrain, caches)
    parameters_updated = miseajours_poids(parameters, gradients, learning_rate=0.01)

In [26]:
######### prediction de Y par les parametre aléatoire :
AL, caches = model_forward(xtrain, parameters_updated)

##### verifier l'erreur combien :
cout = calcule_cost(AL, ytrain)
print("cost = ",cout )

cost =  -1.6133406715432912


In [359]:
xtrain[0]

array([6.8, 2.8, 4.8, 1.4])

In [327]:
ypred=modelpredict(xtest, parameters)

In [328]:
cout_test=calcule_cost(ypred, ytest)
print("cout test = ",cout_test )

cout test =  0.6931917468877367


In [337]:
ypred=modelpredict(xtest, parameters_updated)

In [338]:
cout_test=calcule_cost(ypred, ytest)
print("cout test = ",cout_test )

cout test =  nan


In [343]:
xtrain[0]

array([7.3, 2.9, 6.3, 1.8])

In [344]:
ytrain[0]

2

In [331]:
len(gradients)

9

In [332]:
len(parameters)

6

In [276]:
len(caches)

3

In [118]:
xtrain.shape

(4, 112)

In [123]:
parameters['W2'].shape

(13, 13)

In [125]:
dimension_reseau[-1]

1