In [1]:
import torch
from torch import Tensor
import numpy as np
torch.manual_seed(70318)

from typing import Dict

from helper import (sigmoid, 
                    to_2d)

### Step 1

In [2]:
# constants 
n = 1

In [3]:
X = torch.rand(n, 3)
X

tensor([[ 0.1687,  0.3994,  0.2128]])

$ \begin{bmatrix} x_1 & x_2 & x_3 \end{bmatrix} $

Transforming this into first layer:

$$ 
a_1 = x_1 * v_{11} + x_2 * v_{21} + x_3 * v_{31} + i_{11}
$$

$$ 
a_2 = x_1 * v_{12} + x_2 * v_{22} + x_3 * v_{32} + i_{12}
$$

We can also write this as:

$$ A = X * V + I $$

In [4]:
V = torch.randn(3,4)
I = torch.randn(1,4)

In [5]:
A = torch.mm(X, V) + I
print(A)

tensor([[ 0.5258, -0.4453,  0.9763, -1.8463]])


In [6]:
B = sigmoid(A)

In [7]:
B

tensor([[ 0.6285,  0.3905,  0.7264,  0.1363]])

In [8]:
W = torch.randn(4, 1)
J = torch.randn(1, 1)

C = torch.mm(B, W) + J
print(C)

tensor([[ 0.7252]])


In [9]:
P = sigmoid(C)
print(P)

tensor([[ 0.6738]])


$$
\begin{bmatrix}c_1\end{bmatrix} = 
\begin{bmatrix}b_1 & b_2 & b_3 & b_4\end{bmatrix} *
\begin{bmatrix}w_{11} \\ w_{21} \\ w_{31} \\ w_{41} \end{bmatrix} + \begin{bmatrix}j_1\end{bmatrix} = 
\begin{bmatrix} w_{11} * b_1 + w_{21} * b_2 + w_{31} * b_3 + w_{41} * b_4 + j_1 \end{bmatrix}
$$

$ p_1 = \sigma(c_1) $

In [10]:
Y = torch.randint(high=2, size=(n, 1))
print(Y)

tensor([[ 0.]])


In [11]:
def mse_loss(p: Tensor, 
             y: Tensor) -> Tensor:
    
    return torch.pow(p - y, 2)

In [12]:
L = mse_loss(P, Y)

In [13]:
dLdP = -1.0 * (Y - P)
print(dLdP)

tensor([[ 0.6738]])


In [14]:
dPdC = sigmoid(C) * (1 - sigmoid(C))
print(dPdC)

tensor([[ 0.2198]])


In [15]:
dLdC = dLdP * dPdC
print(dLdC)

tensor([[ 0.1481]])


In [16]:
dLdW = torch.mm(B.transpose(0, 1), dLdC)
print(dLdW)

tensor([[ 0.0931],
        [ 0.0578],
        [ 0.1076],
        [ 0.0202]])


In [17]:
dLdC

tensor([[ 0.1481]])

In [18]:
dLdC.shape

torch.Size([1, 1])

In [19]:
dLdJ = dLdC.sum(dim=0).reshape(1, 1)
print(dLdJ)

tensor([[ 0.1481]])


In [20]:
dLdB = torch.mm(dLdC, W.transpose(0, 1))
print(dLdB)

tensor([[ 0.0530,  0.0311,  0.1894,  0.0563]])


In [21]:
dBdA = sigmoid(A) * (1 - sigmoid(A))
dLdA = dLdB * dBdA
print(dLdA)

tensor(1.00000e-02 *
       [[ 1.2373,  0.7395,  3.7642,  0.6624]])


In [22]:
dLdV = torch.mm(X.transpose(0, 1), dLdA)
print(dLdV)

tensor(1.00000e-02 *
       [[ 0.2088,  0.1248,  0.6352,  0.1118],
        [ 0.4941,  0.2953,  1.5033,  0.2645],
        [ 0.2632,  0.1573,  0.8008,  0.1409]])


In [23]:
dLdI = dLdA.sum(dim=0).reshape(1, 4)
print(dLdI)

tensor(1.00000e-02 *
       [[ 1.2373,  0.7395,  3.7642,  0.6624]])


### Update the weights

In [24]:
learning_rate = 0.01

In [25]:
V = V - learning_rate * dLdV
I = I - learning_rate * dLdI

W = W - learning_rate * dLdW
J = J - learning_rate * dLdJ

In [26]:
def train_neural_net(X: Tensor, 
                     Y: Tensor,
                     weights: Dict[str, Tensor] = None,
                     learning_rate: float = 1.0):

    # check consistent batch sizes
    assert X.shape[0] == Y.shape[0]

    # forward pass
    if not weights:
        weights: Dict[str, Tensor] = {}
        weights['V'] = torch.randn(3, 4)
        weights['I'] = torch.randn(1, 4)
    A = torch.mm(X, weights['V']) + I
    B = sigmoid(A)
    
    if 'W' not in weights:
        weights['W'] = torch.randn(4, 1)
        weights['J'] = torch.randn(1, 1)
    C = torch.mm(B, weights['W']) + J
    P = sigmoid(C)
    
    # loss
    L = mse_loss(P, Y)
    
    # backpropagation
    dLdP = -1.0 * (Y - P)
    dPdC = sigmoid(C) * (1 - sigmoid(C))
    dLdC = dLdP * dPdC
    dLdW = torch.mm(B.transpose(0, 1), dLdC)
    dLdJ = dLdC.sum(dim=0).reshape(1, 1)
    
    dLdB = torch.mm(dLdC, weights['W'].transpose(0, 1))
    dBdA = sigmoid(A) * (1 - sigmoid(A))
    dLdA = dLdB * dBdA
    
    dLdV = torch.mm(X.transpose(0, 1), dLdA)
    dLdI = dLdA.sum(dim=0).reshape(1, 4)
    
    # update the weights
    weights['V'] = weights['V'] - learning_rate * dLdV
    weights['I'] = weights['I'] - learning_rate * dLdI

    weights['W'] = weights['W'] - learning_rate * dLdW
    weights['J'] = weights['J'] - learning_rate * dLdJ
    
    return weights

### Toy example training

In [27]:
from nn_helper import (generate_x_y, 
                       train_and_display, 
                       accuracy_binary)

In [28]:
df, X, y = generate_x_y(random_seed=70318)
X, y = Tensor(X), Tensor(y)
df

Unnamed: 0,X1,X2,X3,y
0,0,0,1,0
1,0,1,0,1
2,1,1,0,1
3,0,1,1,0
4,1,0,1,0
5,1,1,1,0
6,0,0,0,1
7,1,0,0,1


#### Select random row

In [29]:
def random_row(X: Tensor, 
               y: Tensor) -> Tensor:
    num_rows = X.shape[0]
    
    ind = int(torch.randint(0, num_rows, size=(1,)).item())
    
    X_row = X[ind]
    y_row = y[ind]
    
    return to_2d(X_row, "row"), to_2d(y_row, "row")

In [30]:
def neural_net_predict(X: Tensor, 
                       weights: Dict[str, Tensor],
                       learning_rate: float = 1.0) -> Tensor:

    A = torch.mm(X, weights['V']) + I
    B = sigmoid(A)
    
    C = torch.mm(B, weights['W']) + J
    P = sigmoid(C)

    return P

In [31]:
def cross_entropy(predictions: Tensor, 
                  actual: Tensor) -> Tensor:
    
    assert predictions.shape == actual.shape, \
    "Prediction and actual must have same shape"
    
    return -1.0 * actual * torch.log(predictions) - (1.0 - actual) * torch.log(1 - predictions)

In [32]:
def log_loss(X: Tensor, 
             y: Tensor,
             weights: Dict[str, Tensor]) -> Tensor:

    P = neural_net_predict(X, weights)
    
    ce = cross_entropy(P, y)

    return float(torch.mean(ce).item())

In [33]:
random_row(X, y)

(tensor([[ 0.,  0.,  1.]]), tensor([[ 0.]]))

In [52]:
weights = None
iterations_per_epoch = 8
num_epochs = 100
print_every = 50

torch.manual_seed(70318)

for i in range(iterations_per_epoch * num_epochs):

    X_row, y_row = random_row(X, y)

    # Update the weights by feeding one row of data through the neural net
    weights = train_neural_net(X_row, y_row, weights, 4)

preds = neural_net_predict(X, weights)

cross_entropy_loss = cross_entropy(preds, y)

In [53]:
torch.cat([preds, y, cross_entropy_loss], dim=1)

tensor([[ 0.0223,  0.0000,  0.0226],
        [ 0.9785,  1.0000,  0.0217],
        [ 0.9879,  1.0000,  0.0122],
        [ 0.0258,  0.0000,  0.0261],
        [ 0.0284,  0.0000,  0.0288],
        [ 0.0291,  0.0000,  0.0296],
        [ 0.9611,  1.0000,  0.0397],
        [ 0.9806,  1.0000,  0.0196]])