In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat

### Importação e Processamento dos Dados

In [2]:
def zero_col(df):
    zero_cols = []
    for i in range(df.shape[1]):
        test = np.zeros([df.shape[0]])
        if np.array_equal(df[i], test):
            zero_cols.append(i)
            df.pop(i)
    return np.asmatrix(df), zero_cols

In [3]:
images = pd.read_csv("imageMNIST.csv", header=None, decimal=',')
labels = np.array(pd.read_csv("labelMNIST.csv", header=None))
X, zero_cols = zero_col(images)
y = np.asarray(labels)

In [4]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [5]:
def sigmoidGradient(z):
    sigmoid = 1/(1+np.exp(-z))
    return sigmoid*(1-sigmoid)

In [6]:
def createTheta(theta, input_layer_size, hidden_layer_size, num_labels):
    # Cria lista com os vetores theta de cada camada escondida
    n_layers = hidden_layers_size.shape[0]
    thetan = []
    aux_begin = ((input_layer_size+1)*hidden_layers_size[0])
    thetan.append(theta[:aux_begin].reshape(hidden_layers_size[0],input_layer_size+1))
    if(n_layers > 1):
        for i in range(hidden_layers_size.shape[0]-1):
            aux_end = aux_begin + (hidden_layers_size[i]+1)*hidden_layers_size[i+1]
            thetan.append(theta[aux_begin:aux_end].reshape(hidden_layers_size[i+1],hidden_layers_size[i]+1))
            aux_begin = aux_end
    else:
        aux_end = aux_begin
    thetan.append(theta[aux_end:].reshape(num_labels,hidden_layers_size[-1]+1))
    return thetan

In [12]:
def computeCost(X,y,theta,input_layer_size,hidden_layer_size,num_labels, Lambda, regularizada=True):
    
    thetan = theta
    
    m = X.shape[0]
    J = 0
    X = np.hstack((np.ones((m,1)),X))

    y10 = np.zeros((m,num_labels))
    for i in range(1,num_labels+1):
        y10[:,i-1][:,np.newaxis] = np.where(y==i,1,0)

    an = []
    an.append(X)
    for i in range(len(thetan)):
        an.append(sigmoid(an[i] @ thetan[i].T))
        if i != len(thetan) - 1:
            an[-1] = np.hstack((np.ones((m,1)),an[-1]))


    for j in range(num_labels):
        J = J + sum(-y10[:,j]*np.log(an[-1][:,j])-(1-y10[:,j])*np.log(1-an[-1][:,j]))

    cost = 1/m*J
#     aux = 0
#     for i in range(len(thetan)):
#         aux2 = thetan[i][:,1:].reshape([1, thetan[i][:,1:].shape[0] * thetan[i][:,1:].shape[1]])
#         try:
#             aux = aux + np.sum(aux2**2)
    reg_J = cost #+ Lambda/(2*m)*(aux)

    grad = []
    grad_reg = []
    for i in range(len(thetan)):
        grad.append(np.zeros((thetan[i].shape)))           

    for i in range(m):
        xi = X[i,:]
        ani = []
        dn = []
        for j in range(len(an)):
            ani.append(an[j][i,:])   

        dn.append(np.array(ani[-1] - y10[i,:]))
        for j in range(len(thetan)-1):
            z = np.array(np.hstack((np.ones([1,1]),(ani[len(thetan)-2-j] @ thetan[len(thetan)-2-j].T))))
            aux = np.array(dn[0] @ thetan[len(thetan)-1-j]) * sigmoidGradient(z)
            dn.insert(0,aux[:,1:])

    #     grad[0] = grad[0] + dn[0][1:][:,np.newaxis] @ xi[:,np.newaxis].T
        for j in range(len(thetan)):
            grad[j] = grad[j] + (ani[j].T @ dn[j]).T

    for j in range(len(grad)):                        
        grad[j] = 1/m*grad[j]
    for j in range(len(grad)):
        grad_reg.append(grad[j] + (Lambda/m)*np.hstack((np.zeros((thetan[j].shape[0],1)),thetan[j][:,1:])))
    
    if regularizada:
        return reg_J, grad_reg
    else:
        return cost, grad

In [8]:
def randInitializeWeights(L_in,L_out):
    epi = (6**1/2)/(L_in+L_out)**1/2
    W = np.random.rand(L_out,L_in+1)*(2*epi)-epi
    return W

In [14]:
def gradientDescent(X,y,theta,alpha,nbr_iter,Lambda,input_layer_size,hidden_layer_size,num_labels):

    thetan = createTheta(theta,input_layer_size,hidden_layer_size,num_labels)
    
    m = len(y)
    J_history = []
    theta_n = []
    for i in range(nbr_iter):
#         for j in range(len(thetan)-1):
#             theta_n = np.append(thetan[0].flatten(),thetan[j+1].flatten())
        cost, grad = computeCost(X,y,thetan,input_layer_size,hidden_layer_size,num_labels,Lambda)
        
        for j in range(len(thetan)): 
            thetan[j] = thetan[j] - (alpha*grad[j])
        J_history.append(cost)
        
    nn_paramsFinal = thetan
    return nn_paramsFinal,J_history

In [18]:
def prediction(X,thetan):
    m = X.shape[0]
    X = np.hstack((np.ones((m,1)),X))
    
    an = [X]
    for i in range(len(thetan)):
        an.append(sigmoid(an[i] @ thetan[i].T))
        if i != len(thetan) - 1:
            an[-1] = np.hstack((np.ones((m,1)),an[-1]))  

    return np.argmax(an[-1],axis=1)+1

In [33]:
input_layer_size = X.shape[1]
hidden_layers_size = np.array([25])#Array com as camadas escondidas e suas respectivas quantidades de neurônios
num_labels = 10
n_layers = hidden_layers_size.shape[0]

initial_theta1 = randInitializeWeights(input_layer_size,hidden_layers_size[0])
if(n_layers > 1):
    for i in range(hidden_layers_size.shape[0]-1):
        aux_theta = randInitializeWeights(hidden_layers_size[i],hidden_layers_size[i+1])
        initial_theta1 = np.append(initial_theta1.flatten(),aux_theta.flatten())

initial_theta2 = randInitializeWeights(hidden_layers_size[-1],num_labels)
initial_theta = np.append(initial_theta1.flatten(),initial_theta2.flatten())


theta,J_history = gradientDescent(X,y,initial_theta,0.8,800,1,input_layer_size,hidden_layers_size,num_labels)


In [35]:
pred = prediction(X,theta)
print("Training Set Accuracy:",np.sum(pred == y)/50,"%")

Training Set Accuracy: 94.04 %
