Neste notebook, vamos codificar Redes Neurais de forma manual para tentar entender intuitivamente como elas são implementadas na prática.

# Sumário

- [Exemplo 1](#Exemplo-1)
- [Exemplo 2](#Exemplo-2)
- [O que precisamos para implementar uma Rede Neural?](#O-que-precisamos-para-implementar-uma-Rede-Neural?)
- [Referências](#Referências)

# Imports e Configurações

In [1]:
import numpy as np

# Exemplo 1

<img src='images/backprop_example_1.png'>

In [2]:
def sigmoid(x, derivative=False):
    if derivative:
        y = sigmoid(x)
        return y*(1-y)
    return 1.0/(1.0+np.exp(-x))

In [8]:
x=np.array([[0.05, 0.10]])
y=np.array([[0.01, 0.99]])

w1=np.array([[0.15, 0.20], [0.25, 0.30]])
b1=np.array([[0.35]])

w2=np.array([[0.40, 0.45],[0.50, 0.55]])
b2=np.array([[0.60]])

learning_rate=0.5

for i in range(1):
    inp1=np.dot(x, w1.T) + b1
    h1=sigmoid(inp1)
    
    inp2=np.dot(h1, w2.T) + b2
    out=sigmoid(inp2)
    
    cost = 0.5*np.sum((y-out)**2)
    
    dout = -(y-out)
    
    dinp2=sigmoid(inp2, derivative=True)*dout
    dh1 = np.dot(dinp2, w2)
    dw2 = np.dot(dinp2.T, h1)
    db2 = 1.0 * dinp2.sum(axis=0, keepdims=True)
    
    dinp1 = sigmoid(inp1, derivative=True)*dh1
    dx = np.dot(dinp1, w1)
    dw1 = np.dot(dinp1.T, x)
    db1 = 1.0 * dinp1.sum(axis=0, keepdims=True)
    
    w2 = w2 - learning_rate*dw2
    b2 = b2 - learning_rate*db2
    w1 = w1 - learning_rate*dw1
    b1 = b1 - learning_rate*db1
    
    
    print(w1)
    print(w2)

[[0.14978072 0.19956143]
 [0.24975114 0.29950229]]
[[0.35891648 0.40866619]
 [0.51130127 0.56137012]]


# Exemplo 2

In [27]:
def linear(x, derivative=False):
    return np.ones_like(x) if derivative else x

def relu(x, derivative=False):
    if derivative:
        x = np.where(x <= 0, 0, 1)
    return np.maximum(0, x)

def softmax(x, y_oh, derivative=False):
    if derivative:
        y_pred = softmax(x, y_oh)
        k = np.nonzero(y_pred * y_oh)
        pk = y_pred[k]
        y_pred[k]=pk*(1-pk)
        return y_pred
    exp = np.exp(x)
    return exp / np.sum(exp, axis=1, keepdims=True)

def neg_log_likelihood(y_oh, y_pred, derivative=False):
    k = np.nonzero(y_pred * y_oh)
    pk = y_pred[k]
    if derivative:
        y_pred[k]=-1.0/pk
        return y_pred
    return np.mean(-np.log(pk))

def softmax_neg_log_likelihood(y_oh, y_pred, derivative=False):
    y_softmax = softmax(y_pred, y_oh)
    if derivative:
        k = np.nonzero(y_pred * y_oh)
        dlog = neg_log_likelihood(y_oh, y_softmax, derivative=True)
        dsoftmax = softmax(y_pred, y_oh, derivative=True)
        
        y_softmax[k]=dlog[k]*dsoftmax[k]
        return y_softmax/y_softmax.shape[0]
    return neg_log_likelihood(y_oh, y_softmax)

In [32]:
x = np.array([[0.1, 0.2, 0.7]])
y = np.array([[1, 0, 0]])
w1 = np.array([[0.1, 0.2, 0.3], [0.3, 0.2, 0.7], [0.4, 0.3, 0.9]])
b1 = np.ones((1,3))
w2 = np.array([[0.2, 0.3, 0.5], [0.3, 0.5, 0.7], [0.6, 0.4, 0.8]])
b2 = np.ones((1,3))
w3 = np.array([[0.1, 0.4, 0.8], [0.3, 0.7, 0.2], [0.5, 0.2, 0.9]])
b3 = np.ones((1,3))

learning_rate = 0.01

for i in range(401):
    # feedforward
    # 1a camada
    inp1 = np.dot(x, w1.T) + b1
    h1 = relu(inp1)
    
    # 2a camada
    inp2 = np.dot(h1, w2.T) + b2
    h2 = sigmoid(inp2)
    
    # 3a camada
    inp3 = np.dot(h2, w3.T) + b3
    out = linear(inp3)
    
    cost = softmax_neg_log_likelihood(y, out)
    
    # backpropagation
    # insira seu código aqui!
    dout = softmax_neg_log_likelihood(y, out, derivative=True)
    
    dinp3 = linear(inp3, derivative=True) * dout
    dh2 = np.dot(inp3, w3)
    dw3 = np.dot(dinp3.T, h2)
    db3 = 1.0 * dinp3.sum(axis=0, keepdims=True)
    
    dinp2 = sigmoid(inp2, derivative=True) * dh2
    dh1 = np.dot(inp2, w2)
    dw2 = np.dot(dinp2.T, h1)
    db2 = 1.0 * dinp2.sum(axis=0, keepdims=True)
    
    dinp1 = relu(inp1, derivative=True) * dh1
    dx = np.dot(inp1, w1)
    dw1 = np.dot(inp1.T, h1)
    db1 = 1.0 * dinp1.sum(axis=0, keepdims=True)
    
    w3 = w3 - learning_rate*dw3
    b3 = b3 -learning_rate*db3
    w2 = w2 - learning_rate*dw2
    b2 = b2 -learning_rate*db2
    w1 = w1 - learning_rate*dw1
    b1 = b1 -learning_rate*db1
    
    
    if i%30 ==0:
        cost = softmax_neg_log_likelihood(y, out)
        print(cost)
    

for w in [w1, w2, w3]:
    print(w)

1.1674456052871238
0.6729950015153584
0.4862631673354616
0.38539691925365127
0.32821948011489066
0.29436728952876506
0.2727431981657179
0.25780236788053007
0.24634367802084267
0.23673819401294363
0.22814040800552246
0.2200979711480343
0.21236651911446333
0.20481952735167722
[[-0.08958684 -0.05703158  0.08274763]
 [ 0.04296777 -0.16549796  0.41640128]
 [ 0.18338759  0.01915583  0.63274679]]
[[0.12657336 0.15651546 0.44035962]
 [0.21582263 0.31820783 0.64008716]
 [0.47745754 0.12411929 0.71877765]]
[[ 0.77437742  1.01636974  1.33324711]
 [-0.01761382  0.41216485 -0.04638309]
 [ 0.14323639 -0.1285346   0.61313598]]


# O que precisamos para implementar uma Rede Neural?

# Referências

- [Neural Network from Scratch](https://beckernick.github.io/neural-network-scratch/)
- [Backpropagation Algorithm](https://theclevermachine.wordpress.com/tag/backpropagation-algorithm/)
- [Back-Propagation is very simple. Who made it Complicated ?](https://becominghuman.ai/back-propagation-is-very-simple-who-made-it-complicated-97b794c97e5c)
- [A Step by Step Backpropagation Example](https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/)
- [Understanding softmax and the negative log-likelihood](https://ljvmiranda921.github.io/notebook/2017/08/13/softmax-and-the-negative-log-likelihood/)