## Classificação do Fashion MNIST com NN simples e Regressão Logística

Funções das Redes Neurais

In [1]:
'''Criando a funcao de normalização de um dataframe inteiro
    input:
        df: Dataframe
    output:
        df: Dataframe com valores normalizados
'''
def normalize_dataframe(df):
    for column in df:
        df[column] = df[column]/255
    return df


'''Funcao para aplicar relu em um vetor
    input:
        z: weights * params + bias
    output:
        s: 0 para valores negativos z para valores positivos aplicado em cada amostra
'''    
def relu(z):
    return np.maximum(z, 0)


'''Funcao para aplicar a derivada da relu em um vetor
    input:
        z: weights * params + bias
    output:
        s: 0 para valores negativos 1 para valores positivos aplicado em cada amostra
'''    
def relu_(z):
    return np.array(z>0).astype(int)


'''Funcao para aplicar sigmoid em um vetor
    input:
        z: weights * params + bias
    output:
        s: valor entre 0-1 da aplicação da função para cada amostra
'''
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s

'''Funcao para aplicar a derivada da sigmoid em um vetor
    input:
        z: weights * params + bias
    output:
        p: taxa de variação da sigmoid para cada amostra
'''
def sigmoid_(z):
    s = sigmoid(Z1) * (1 - sigmoid(Z1))
    return s


'''Funcao para calcular a tangente hiporbólica em um vetor
    input:
        x: weights * params + bias
    output:
        s: valor entre 0-1 da aplicação da função para cada amostra
'''
def tanh(x):
    s = (2 / (1 + np.exp(-2*x))) -1
    return s

'''Funcao para calcular a derivada da tangente hiporbólica em um vetor
    input:
        x: weights * params + bias
    output:
        s: porcentagem de chance do valor ser da classe predita para cada amostra
'''
def tanh_(x):
    s = 1 - tanh(x)**2
    return s


'''Funcao para calcular o erro médio da predição em todo o dataset utilizando cross-entropy (Não pode ser utilizada com Relu)
    input:
        Y: labels do dataset
        Y_pred: labels preditas
    output:
        custo: erro médio da predição
'''
def cross_entropy_loss(Y, Y_pred):
    m = Y.shape[1]
    custo = -(1./m) * ( np.sum( np.multiply(np.log(Y_pred),Y) ) + np.sum( np.multiply(np.log(1-Y_pred),(1-Y)) ) )
    return custo


'''Funcao para calcular o erro médio da predição em todo o dataset utilizando custo quadratico
    input:
        x: weights * params + bias
    output:
        p: porcentagem de chance do valor ser da classe predita
'''
def quadratic_loss(Y, Y_pred):
        m = Y.shape[1]
        return (1./m)*0.5*np.linalg.norm(Y_pred-Y)**2



### Preparando os Dados

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import time

df = pd.read_csv("data/fashion-mnist_train.csv")

#Separate the Training DF into Train and Validation
msk = np.random.rand(len(df)) < 0.6

train_df = df[msk]
validation_df = df[~msk]

Y_train = train_df["label"]
Y_validation = validation_df["label"]

train_df = train_df.loc[:, train_df.columns != "label"]
validation_df = validation_df.loc[:, validation_df.columns != "label"]

#test_df = pd.read_csv("data/fashion-mnist_test.csv")

normalized_train_df = normalize_dataframe(train_df)
normalized_validation_df = normalize_dataframe(validation_df)


## A sessão abaixo será apagada. Só é utilizada porque falta a SOFTMAX na ultima camada
#Put labels to single class

y_aux = np.zeros(Y_train.shape)
y_aux[np.where(Y_train == 0.0)[0]] = 1
Y_train = y_aux

print(Y_train.max())
print(Y_train.min())

y_aux = np.zeros(Y_validation.shape)
y_aux[np.where(Y_validation == 0.0)[0]] = 1
Y_validation = y_aux

### Visualização dos Dados

In [None]:
import matplotlib
import matplotlib.pyplot as plt

i = 3
#print(np.array(train_df.iloc[1]).reshape(28,28))
plt.imshow(np.array(train_df.iloc[5]).reshape(28,28), cmap = matplotlib.cm.binary)
plt.axis("off")
plt.show()
print(Y_train)


### Rede Neural sem Camadas Escondidas

In [6]:
#NO HIDDEN LAYER

learning_rate = 0.1

X = normalized_train_df.T
Y = np.array(Y_train).reshape(1, X.shape[1])

n_x = X.shape[0]
m = X.shape[1]

W = np.random.randn(n_x, 1) * 0.01
b = np.zeros((1, 1))

for i in range(2000):
    Z = np.matmul(W.T, X) + b
    A = relu(Z)

    cost = quadratic_loss(Y, A)

    dW = (1/m) * np.matmul(X, (A-Y).T)
    db = (1/m) * np.sum(A-Y, axis=1, keepdims=True)

    W = W - learning_rate * dW
    b = b - learning_rate * db

    if (i % 100 == 0):
        print("Epoch", i, "cost: ", cost)

print("Final cost:", cost)


Epoch 0 cost:  0.051135740978545015
Epoch 100 cost:  0.03893971457554921
Epoch 200 cost:  0.037636272280184915
Epoch 300 cost:  0.03693008125617631
Epoch 400 cost:  0.03649006161015847
Epoch 500 cost:  0.0361874562817741
Epoch 600 cost:  0.03595700352197947
Epoch 700 cost:  0.035772829636446166
Epoch 800 cost:  0.035618628376356765
Epoch 900 cost:  0.03548481175011885
Epoch 1000 cost:  0.03536993437382601
Epoch 1100 cost:  0.03526920738570392
Epoch 1200 cost:  0.03517931813008395
Epoch 1300 cost:  0.035098168242733485
Epoch 1400 cost:  0.03502336391538415
Epoch 1500 cost:  0.03495465263458976
Epoch 1600 cost:  0.03489183529125428
Epoch 1700 cost:  0.0348335203957646
Epoch 1800 cost:  0.03477933718136989
Epoch 1900 cost:  0.03472895774734202
Final cost: 0.03641135985981767


In [None]:
from sklearn.metrics import classification_report, confusion_matrix


X_validation = normalized_validation_df.T
Y_validation = np.array(Y_validation).reshape(1, X_validation.shape[1])

n_x = X_validation.shape[0]
m = X_validation.shape[1]
Z = np.matmul(W.T, X_validation) + b
A = sigmoid(Z)

predictions = (A>.5)[0,:]
labels = (Y_validation == 1)[0,:]

print(confusion_matrix(predictions, labels))


### Rede Neural com 1 Camada Escondida

In [56]:
#ONE HIDDEN LAYER

learning_rate = 0.1

X = normalized_train_df.T
Y = np.array(Y_train).reshape(1, X.shape[1])

n_x = X.shape[0]
n_h = 64
m = X.shape[1]

W1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.zeros((n_h, 1))
W2 = np.random.randn(1, n_h) * 0.01
b2 = np.zeros((1, 1))

for i in range(2000):

    Z1 = np.matmul(W1, X) + b1
    A1 = tanh(Z1)
    Z2 = np.matmul(W2, A1) + b2
    A2 = tanh(Z2)

    cost = quadratic_loss(Y, A2)

    dZ2 = A2-Y
    dW2 = (1./m) * np.matmul(dZ2, A1.T)
    db2 = (1./m) * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.matmul(W2.T, dZ2)
    dZ1 = dA1 * tanh_(Z1)
    dW1 = (1./m) * np.matmul(dZ1, X.T)
    db1 = (1./m) * np.sum(dZ1, axis=1, keepdims=True)

    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1

    if i % 100 == 0:
        print("Epoch", i, "cost: ", cost)

print("Final cost:", cost)


Epoch 0 cost:  0.05148921429381009
Epoch 100 cost:  0.024321304995056515
Epoch 200 cost:  0.022435680888164403
Epoch 300 cost:  0.021395930507784964
Epoch 400 cost:  0.020522833481296944
Epoch 500 cost:  0.019666102625197062
Epoch 600 cost:  0.018918617974133675
Epoch 700 cost:  0.018296930192860345
Epoch 800 cost:  0.0220776141038961
Epoch 900 cost:  0.017397535890385653
Epoch 1000 cost:  0.01708932705717457
Epoch 1100 cost:  0.0168397401752846
Epoch 1200 cost:  0.016635983643505832
Epoch 1300 cost:  0.017114589675492307
Epoch 1400 cost:  0.016370817504186345
Epoch 1500 cost:  0.016165625398442754
Epoch 1600 cost:  0.016045986176954085
Epoch 1700 cost:  0.01597662292429264
Epoch 1800 cost:  0.016782207091304333
Epoch 1900 cost:  0.01579291146866112
Final cost: 0.01564525516238836


In [57]:
from sklearn.metrics import classification_report, confusion_matrix


X_validation = normalized_validation_df.T
Y_validation = np.array(Y_validation).reshape(1, X_validation.shape[1])

n_x = X_validation.shape[0]
m = X_validation.shape[1]
Z = np.matmul(W.T, X_validation) + b
A = tanh(Z)

Z1 = np.matmul(W1, X_validation) + b1
A1 = tanh(Z1)
Z2 = np.matmul(W2, A1) + b2
A2 = tanh(Z2)

predictions = (A2>.5)[0,:]
labels = (Y_validation == 1)[0,:]

print(confusion_matrix(predictions, labels))
print(classification_report(predictions, labels))

predictions = (A>.5)[0,:]
labels = (Y_validation == 1)[0,:]

print(confusion_matrix(predictions, labels))


[[21175   586]
 [  391  1730]]
             precision    recall  f1-score   support

      False       0.98      0.97      0.98     21761
       True       0.75      0.82      0.78      2121

avg / total       0.96      0.96      0.96     23882

[[20636   234]
 [  930  2082]]
