# Convolutions avec des numpy array
Les fonctions disponibles : 
- get_size_result
- pad_image
- convolution
- max_pooling
- activate (x -> ReLU(x)+biais)
- flatten

Les filtres ont été initialisés à partir des exemples ci-contres : http://scikit-image.org/docs/dev/api/skimage.filters.html#skimage.filters.scharr_v

In [1]:
import numpy as np

### Données test

In [2]:
image = np.array([[2,3,7,4,6,2,-9],
                 [6,6,9,8,7,4,3],
                 [3,4,8,3,8,9,7],
                 [7,8,3,6,6,3,4],
                 [4,2,1,8,3,4,6],
                 [3,2,4,1,9,8,3],
                 [0,1,3,9,2,1,4]])
image
#image = np.random.randint(low=-5, high=5, size=7*7).reshape(7,7) # pour en générer aléatoirement

array([[ 2,  3,  7,  4,  6,  2, -9],
       [ 6,  6,  9,  8,  7,  4,  3],
       [ 3,  4,  8,  3,  8,  9,  7],
       [ 7,  8,  3,  6,  6,  3,  4],
       [ 4,  2,  1,  8,  3,  4,  6],
       [ 3,  2,  4,  1,  9,  8,  3],
       [ 0,  1,  3,  9,  2,  1,  4]])

In [3]:
filtre = np.array([[3,4,4],[1,0,2],[-1,0,3]])
filtre

array([[ 3,  4,  4],
       [ 1,  0,  2],
       [-1,  0,  3]])

### Fonctions d'activation

In [107]:
def sigmoid(vector, dérivée=False):
    if dérivée :
        return (vector * (1-vector)).reshape(-1, 1)
    return (1 / (1+np.exp(-vector))).reshape(-1, 1)

def ReLU(image, dérivée=False):
    if dérivée:
        return  np.where(image >= 0, 1, 0)
    return image * (image > 0)

def activate(image, biais):
    """ Retourne ReLU(image) + biais"""
    res = ReLU(image) + biais
    return res

activate(image, 1)
sigmoid(np.array([-0.1, 0.1]), dérivée=True)

array([[-0.11],
       [ 0.09]])

### Convolution

In [8]:
def get_size_result(image, filtre, padding=0, stride=1):
    Nw, Nh = image.shape[0], image.shape[1]
    F = filtre.shape[0]
    size_width  = np.floor((Nw-F + 2*padding) / stride + 1)
    size_height = np.floor((Nh-F + 2*padding) / stride + 1)
    return int(size_width), int(size_height), 1
get_size_result(image, filtre, stride=2)

(3, 3, 1)

In [9]:
def convolution(image, liste_filtres, padding=0, stride=1, bias=[0]):
    resultats = []
    for filtre in liste_filtres:
        resultat = np.zeros(get_size_result(image, filtre, padding, stride)[:2])
        f = filtre.shape[0]
        for j in np.arange(0, resultat.shape[1], 1):
            for i in np.arange(0, resultat.shape[0], 1):
                # Récupération de la position du top-left-corner
                tlc = (int(i*stride-padding), int(j*stride-padding))
                # Récupération de la zone concernée dans l'image
                zone = image[tlc[0]:tlc[0]+f, tlc[1]:tlc[1]+f, :]
                for channel in range(zone.shape[2]):
                    resultat[i,j] += np.sum(zone[:,:,channel] * filtre)
        resultats.append(resultat.reshape(resultat.shape[0], resultat.shape[1], 1))
    # Activations (ReLU + biais): 
    resultats = [activate(r, biais=bias[idx]) for idx, r in enumerate(resultats)]
    stacked = np.concatenate(resultats, axis=2)
    return stacked

convolution(image.reshape(7,7,1), [filtre], padding=0, stride=2)

array([[[  91.],
        [ 100.],
        [  16.]],

       [[  69.],
        [  91.],
        [ 117.]],

       [[  44.],
        [  72.],
        [  74.]]])

### Padding

In [11]:
def pad_image(image, padding=1):
    resultat = np.zeros((image.shape[0] + 2*padding, image.shape[1] + 2*padding))
    resultat[padding:image.shape[0] + padding, padding:image.shape[1]+padding] = image
    return resultat
pad_image(image, padding=1)

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  2.,  3.,  7.,  4.,  6.,  2., -9.,  0.],
       [ 0.,  6.,  6.,  9.,  8.,  7.,  4.,  3.,  0.],
       [ 0.,  3.,  4.,  8.,  3.,  8.,  9.,  7.,  0.],
       [ 0.,  7.,  8.,  3.,  6.,  6.,  3.,  4.,  0.],
       [ 0.,  4.,  2.,  1.,  8.,  3.,  4.,  6.,  0.],
       [ 0.,  3.,  2.,  4.,  1.,  9.,  8.,  3.,  0.],
       [ 0.,  0.,  1.,  3.,  9.,  2.,  1.,  4.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])

### Pooling

In [12]:
def max_pooling(image, filtre_size, padding=0, stride=1):
    resultats = []
    for channel in range(image.shape[2]):
        filtre_sim = np.zeros((filtre_size, filtre_size))
        resultat = np.zeros(get_size_result(image, filtre_sim, padding, stride))
        for j in np.arange(0, resultat.shape[1], 1):
            for i in np.arange(0, resultat.shape[0], 1):
                # Récupération de la position du top-left-corner
                tlc = (int(i*stride-padding), int(j*stride))
                # Récupération de la zone concernée dans l'image
                zone = image[tlc[0]:tlc[0]+filtre_size, tlc[1]:tlc[1]+filtre_size, channel]
                resultat[i,j] = np.max(zone)
        resultats.append(resultat.reshape(resultat.shape[0],resultat.shape[1], 1))
    stacked = np.concatenate(resultats, axis=2)
    return stacked

im = np.array([[1,3,2,1,3], [2,9,1,1,5], [1,3,2,3,2], [8,3,5,1,0], [5,6,1,2,9]])
im = im.reshape(5,5,1)
max_pooling(im, filtre_size=3, stride=1, padding=0)

array([[[ 9.],
        [ 9.],
        [ 5.]],

       [[ 9.],
        [ 9.],
        [ 5.]],

       [[ 8.],
        [ 6.],
        [ 9.]]])

### Flatten

In [13]:
def flatten(image):
    from functools import reduce
    nb_éléments = reduce(np.multiply, image.shape)
    return image.reshape(nb_éléments, 1)
flatten(image).shape

(49, 1)

### Loss and cost function
<img width=500px src='https://wikimedia.org/api/rest_v1/media/math/render/svg/80f87a71d3a616a0939f5360cec24d702d2593a2'/>

In [14]:
def logloss(y_true, y_pred):
    """Retourne la fonction de perte pour un unique training example i"""
    if y_true == 1:
        return -np.log(y_pred)
    else:
        return -np.log(1 - y_pred)

In [81]:
def cost_function(y_true_vector, y_pred_vector):
    """Version (non vectorisée) de la fonction de coût pour tous les exemples x1...xn"""
    cost = 0
    for i in range(len(y_true_vector)):
        y_true, y_pred = y_true_vector[i], y_pred_vector[i]
        cost += logloss(y_true, y_pred)
    return cost / len(y_true_vector)
y_true = [0,0,0,1,1,1]
y_pred = [0.01,0.2,0.03,0.69,0.7,0.9]
cost_function(y_true, y_pred)

0.18279203927330176

### Weights initialisation
<img width=500px src='https://cdn-images-1.medium.com/max/800/1*-vY3G0W-4nJo-dQ1jm0p0w.png'/>

In [192]:
def initialize_weights(size_previous_layer, size_layer):
    """Retourne W et b initialisés"""
    np.random.seed(123)
    #return np.random.randn(size_layer, size_previous_layer) * np.sqrt(2/size_previous_layer), np.zeros(size_layer).reshape(size_layer, 1)
    return np.random.normal(loc=0, scale=(size_previous_layer + size_layer) / 2, size=(size_layer, size_previous_layer)), np.zeros(size_layer).reshape(size_layer, 1)

W, b = initialize_weights(4,3)
W, b

(array([[-3.79970711,  3.49070906,  0.99042474, -5.2720315 ],
        [-2.02510088,  5.78002788, -8.49337735, -1.5011942 ],
        [ 4.43077691, -3.03359141, -2.37610153, -0.33148139]]), array([[ 0.],
        [ 0.],
        [ 0.]]))

### Reproduction de l'architecture LeNet - 5

In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os

In [145]:
#im = plt.imread('Partitions/bellaciao.png')[:, :, 0]
path = ['Binaire/temp/311_1.png', 'Binaire/temp/259_0.png', 'Binaire/temp/308_1.png']
y_true = np.array(list(map(int, path[-5])))
im = plt.imread(path)[:, :, 0]
im = im.reshape(im.shape[0], im.shape[1], 1)
im.shape

(72, 14, 1)

In [149]:
y_true
path = ['Binaire/temp/311_1.png', 'Binaire/temp/259_0.png', 'Binaire/temp/308_1.png']
y_true = np.array(list(map(lambda x : int(x[-5]), path)))


array([1, 0, 1])

In [202]:
# Filtres : 
fil1 = np.array([[1,1,1], [0,0,0], [-1,-1,-1]])
fil2 = np.array([[1,0,-1], [1,0,-1], [1,0,-1]])
fil3 = np.array([[-1,0,1], [-2,0,2], [-1,0,1]])
fil4 = np.array([[-1,-2,-1], [0,0,0], [1,2,1]])
fil5 = np.array([[3,0,-3], [10,0,-10], [3,0,-3]])
liste_filtres = [fil1, fil2, fil3, fil4, fil5]
learning_rate = 0.01
cost = 0

# Initialisation des paramètres :
paths = ['Binaire/temp/311_1.png', 'Binaire/temp/259_0.png', 'Binaire/temp/308_1.png']
W1, b1 = initialize_weights(160, 112)
W2, b2 = initialize_weights(112, 1)

for image in paths:
    y_true = np.array(list(map(int, image[-5])))
    im = plt.imread(image)[:, :, 0]
    im = im.reshape(im.shape[0], im.shape[1], 1)
    break
    ### FORWARD PROPAGATION ###
    # Conv-layer 1 :
    print('Image initiale', im.shape)
    im = convolution(image=im, liste_filtres=liste_filtres, padding=0, stride=1, bias=[0]*len(liste_filtres))
    print('Image après Conv1', im.shape)
    im = max_pooling(image=im, filtre_size=2, padding=0, stride=2)
    
    # Conv-layer 2
    print('Image après Pool1', im.shape)
    im = convolution(image=im, liste_filtres=liste_filtres, padding=0, stride=1, bias=[0]*len(liste_filtres))
    print('Image après Conv2', im.shape)

    im = max_pooling(image=im, filtre_size=2, padding=0, stride=2)
    print('Image après Pool2', im.shape)
    
    # FC-layer 3
    im = flatten(im)
    print('Image après flatten', im.shape)
    
    z1 = W1 @ im + b1
    a1 = ReLU(z1)
    print('\na1.shape =', a1.shape)

    # FC-layer 4
    z2 = W2 @ a1 + b2
    print('z2 =', z2)
    a2 = sigmoid(z2)
    print('a2.shape =', a2.shape, 'a2 =', a2)

    # Coût
    cost = cost + logloss(y_true, a2) #cost_function(y_true, a2)
    print('Cost epoch 1', cost)

    ### BACK PROPAGATION ### 
    dz2 = a2 - y_true
    dW2 = (a2 - y_true) @ a1.T

    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*b2

    da1 = W2.T @ dz2
    dz1 = da1 * ReLU(da1, dérivée=True)

    dW1 = dz1 @ im.T
    db1 = dz1

    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*b1
#cost /= len(paths)

In [232]:
df = pd.DataFrame({'Listes':[[1,2,3],[4,5,6]]})
df.Listes.values#[:,0]

array([list([1, 2, 3]), list([4, 5, 6])], dtype=object)

In [153]:
y_true, a2, cost, W2.shape

(array([1]), array([[ 0.00461016]]), array([ 5.37949324]), (1, 112))

In [138]:
#check dimensions
print("a2, z2 :", a2.shape, z2.shape)
print("W2, b2 :", W2.shape, b2.shape)
print("a1, z1 :", a1.shape, z1.shape)

a2, z2 : (1, 1) (1, 1)
W2, b2 : (1, 112) (1, 1)
a1, z1 : (112, 1) (112, 1)


In [425]:
plt.imsave('im1.png', im[:,:,0])
plt.imsave('im2.png', im[:,:,1])

# Annexes 

In [None]:
# Simulation d'un filtre : 
fil = np.round(np.random.rand(3*3).reshape(3,3), 2)

In [401]:
#"""
def convolution(image, filtre, padding=0, stride=1):
    
    resultat = np.zeros(get_size_result(image, filtre, padding, stride))
    f = filtre.shape[0]
    for j in np.arange(0, resultat.shape[1], 1):
        for i in np.arange(0, resultat.shape[0], 1):
            # Récupération de la position du top-left-corner
            tlc = (int(i*stride-padding), int(j*stride))
            # Récupération de la zone concernée dans l'image
            zone = image[tlc[0]:tlc[0]+f, tlc[1]:tlc[1]+f]
            resultat[i,j] = np.sum(zone * filtre)
    return resultat

#"""
#None
convolution(image, filtre, padding=0, stride=2)

array([[[ 91.],
        [100.],
        [ 88.]],

       [[ 69.],
        [ 91.],
        [117.]],

       [[ 44.],
        [ 72.],
        [ 74.]]])

In [350]:
"""
def max_pooling(image, filtre_size, padding=0, stride=1):
    filtre_sim = np.zeros((filtre_size, filtre_size))
    resultat = np.zeros(get_size_result(image, filtre_sim, padding, stride))
    for j in np.arange(0, resultat.shape[1], 1):
        for i in np.arange(0, resultat.shape[0], 1):
            # Récupération de la position du top-left-corner
            tlc = (int(i*stride-padding), int(j*stride))
            # Récupération de la zone concernée dans l'image
            zone = image[tlc[0]:tlc[0]+filtre_size, tlc[1]:tlc[1]+filtre_size]
            resultat[i,j] = np.max(zone)
    return resultat
im = np.array([[1,3,2,1,3], [2,9,1,1,5], [1,3,2,3,2], [8,3,5,1,0], [5,6,1,2,9]])
max_pooling(im, filtre_size=3, stride=1, padding=0)
"""
None