In [4]:
import struct
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
import math
from sklearn.datasets import make_blobs
from tqdm import tqdm #barre de progression taquadoum
import numpy as np
import warnings




#suppress warnings
warnings.filterwarnings('ignore')
#sav files
file1='t10k-images.idx3-ubyte'
file2='t10k-labels.idx1-ubyte'
file3='train-images.idx3-ubyte'
file4='train-labels.idx1-ubyte'

def read_idx(filename):
    '''Reads an idx file and returns an ndarray'''
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

# def preprocessing(arr3d_array):
#     arr2d_array=arr3d_array.reshape(arr3d_array.shape[0],arr3d_array.shape[1]*arr3d_array.shape[2])
#     arr2d_array = normalize(arr2d_array)
#     return arr2d_array
  
def preprocessing(arr3d_array):
    arr2d_array=arr3d_array.reshape(arr3d_array.shape[0],arr3d_array.shape[1]*arr3d_array.shape[2])
    arr2d_array = (arr2d_array - arr2d_array.min())/ (arr2d_array.max() - arr2d_array.min())
    return arr2d_array
    

def preprocessing_label(vector):
    matrice = np.zeros((vector.size, 10))
    matrice[np.arange(vector.size), vector] = 1
    return matrice
    
    

def graphic_view():
    X, y = make_blobs(n_samples=60000, n_features=28*28, centers=10, random_state=0)
    y = y.reshape((y.shape[0], 1))

    print(X.shape,y.shape)
    print('dimensions de X:', X.shape)
    print('dimensions de y:', y.shape)

    plt.scatter(X[:,0], X[:, 1], c=y, cmap='summer')
    plt.show()



def initialisation(n0, n1, n2,n3):
    W1 = np.random.randn(n0, 1)
    b1 = np.zeros((n1, 1))
    W2 = np.random.randn(n1, 1)
    b2 = np.zeros((n2, 1))
    W3 = np.random.randn(n2, 1)
    b3 = np.zeros((n3, 1))
    return W1,W2,W3,b1,b2,b3




def sigmoide(z_i):
    return 1/(1+np.exp(-z_i))

# normaliser poids
def softmax(array):
    return (np.exp(array - array.max()))/np.sum(np.exp(array - array.max()))


def dsigmoid(x):
    return (np.exp(-x))/((np.exp(-x)+1)**2)






def forward_propagation(X,W1,W2,W3,b1,b2,b3):

    

    Z1 = X.dot(W1) + b1
    A1 = sigmoide(Z1)

    Z2 = A1.dot(W2) + b2
    A2 = sigmoide(Z2)
    
    Z3 = A2.dot(W3) + b3
    A3 = sigmoide(Z3)

    
    return A1,A2,A3

    
# un batch  après l'autre
#On prend la transposée de X et de y
def back_propagation(X, y, w1,w2,w3,b1,b2,b3, A1,A2,A3):
    m = y.shape[1]
    dZ3 = A3 - y
    dW3 = 1 / m * dZ3.dot(A2.T)
    #pour éviter que la dimension 1 disparaisse et des erreurs de broadcasting on met keepdims à true
    db3 = 1 / m * np.sum(dZ3, axis=1, keepdims = True)
    dZ2 = A2 - y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2, axis=1, keepdims = True)
    dZ1 = np.dot(W2.T, dZ2) * A1 * (1 - A1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1, axis=1, keepdims = True)
    return dW1,dW2,dW3,db1,db2,db3

    
    
    
    

def update(dW1,dW2,dW3,db1,db2,db3, w1,w2,w3,b1,b2,b3, learning_rate):
    w1new = W1 - learning_rate * dW1
    b1new = b1 - learning_rate * db1
    w2new = W2 - learning_rate * dW2
    b2new = b2 - learning_rate * db2
    w3new = W3 - learning_rate * dW3
    b3new = b3 - learning_rate * db3
    return w1new,w2new,w3new,b1new,b2new,b3new


def compute_error(A,y):
    epsilon = 1e-15
    return 1 / len(y) * np.sum(-y * np.log(A + epsilon) - (1 - y) * np.log(1 - A + epsilon))

def compute_batch_error(test_data_x_test,test_data_label_test,updated_w1,updated_w2,updated_w3,neb1,newb2,newb3):
    error_total=np.empty([0,test_data_x_test.shape[0]],dtype=float)
    for j in range(0,test_data_x_test.shape[0]):
        new_test_data,new_z1__,new_z2__,new_z3__,new_a1__,new_a2__,new_a3__=forward_propagation(test_data_x_test[j],w1,w2,w3,b1,b2,b3)

        indice_max_y=np.argmax(test_data_label_test[j],axis=1)
       
        indice_max_a=np.argmax(new_a3__,axis=1)
       
        error_per_batch=1-np.mean(indice_max_a==indice_max_y)
        error_total=np.append(error_total,error_per_batch)
        
    return np.mean(error_total)





def neural_network(test_data_x,test_data_y,train_data_x, train_label_array, n1=128,n2=64, learning_rate = 0.1, n_iter = 1000, batch=60):
    train_data_x=preprocessing(train_data_x)
    train_label_array=preprocessing_label(train_label_array)
    test_data_x=preprocessing(test_data_x)
    test_data_y=preprocessing_label(test_data_y)
    
    sets_list=np.array_split(train_data_x, batch)
    sets_array=np.array(sets_list)
    y_sets=np.array_split(train_label_array, batch)
    y_sets_array=np.array(y_sets)
    batch_array=sets_array
    
    x_tests=np.array_split(test_data_x, batch/6)
    x_tests=np.array(x_tests)
    
    y_tests=np.array_split(test_data_y, batch/6)
    y_tests=np.array(y_tests)
    
    
    
    # initialisation parametres
    n0 = train_data_x.shape[1]
    n3 = train_label_array.shape[1]
    np.random.seed(0)
    w1,w2,w3,b1,b2,b3 = initialisation(n0, n1, n2, n3)

    train_loss = []
    train_acc = []
    history = []

    # gradient descent
    for i in tqdm(range(n_iter)):
        for j in (range(batch)):
            a1,a2,a3 = forward_propagation(batch_array[j], w1,w2,w3,b1,b2,b3)
        # A2 = activations['A2']

        # Plot courbe d'apprentissage
#         train_loss.append(log_loss(y.flatten(), A2.flatten()))
#         y_pred = predict(X, parametres)
#         train_acc.append(accuracy_score(y.flatten(), y_pred.flatten()))
        
        # history.append([parametres.copy(), train_loss, train_acc, i])

        # mise a jour
            dW1_,dW2_,dW3_,db1_,db2_,db3_ = back_propagation(batch_array[j], y_sets_array[j], w1,w2,w3,b1,b2,b3, a1,a2,a3)
            w1new_,w2new_,w3new_,b1new_,b2new_,b3new_ = update(dW1_,dW2_,dW3_,db1_,db2_,db3_, w1,w2,w3,b1,b2,b3, learning_rate)
            res=compute_batch_error(test_data_x_test,test_data_label_test,w1new_,w2new_,w3new_,b1new_,b2new_,b3new)


    # plt.figure(figsize=(12, 4))
    # plt.subplot(1, 2, 1)
    # plt.plot(train_loss, label='train loss')
    # plt.legend()
    # plt.subplot(1, 2, 2)
    # plt.plot(train_acc, label='train acc')
    # plt.legend()
    # plt.show()

    return res










        
    

x_train,y_train,x_test,y_test=read_idx(file3),read_idx(file4),read_idx(file1),read_idx(file2)



neural_network(x_test,y_test,x_train, y_train)

  0%|                                                  | 0/1000 [00:00<?, ?it/s]


ValueError: operands could not be broadcast together with shapes (1000,1) (128,1) 

In [None]:







print('y_test shape')
print(y_tests.shape)
print('y_tests[0:6].shape')
print(y_tests[0:6].shape)
print(y_tests[0:6])
print('y_test[0] shape')
print(y_tests[0].shape)
print(y_tests[0])

print('y_sets_array shape')
print(y_sets_array.shape)
print('y_sets_array[0:6].shape')
print(y_sets_array[0:6].shape)
print(y_sets_array[0:6])
print('y_sets_array[0] shape')
print(y_sets_array[0].shape)
print(y_sets_array[0])


print('x_test shape')
print(x_tests.shape)
print('x_tests[0:6].shape')
print(x_tests[0:6].shape)
print(x_tests[0:6])
print('x_test[0] shape')
print(x_tests[0].shape)
print(x_tests[0])


print('batch_array shape')
print(batch_array.shape)
print('batch_array[0:6].shape')
print(batch_array[0:6].shape)
print(batch_array[0:6])
print('batch_array[0] shape')
print(batch_array[0].shape)
print(batch_array[0])