In [157]:
import numpy as np

## NumPy

In [163]:
def init():
    global x, w, z, y, delta
    x = 1 * np.ones(2)
    w = [
        0.5 * np.ones((3, 2)),
        0.5 * np.ones((2, 3)),
        0.5 * np.ones((1, 2))
    ]
    z = [
        np.ones(3),
        np.ones(2),
        np.ones(1)
    ]
    y = 1 * np.ones(1)

    delta = [
        np.ones(3),
        np.ones(2),
        np.ones(1)
    ]

init()

In [209]:
f = lambda x: x

In [164]:
def forward(x, w, z):
    z[0] = np.dot(w[0], x)
    for i in range(1, len(w)):
        z[i] = np.dot(w[i], z[i-1])
    return z

forward(x, w, z)

[array([1., 1., 1.]), array([1.5, 1.5]), array([1.5])]

In [165]:
def backward(y, w, z):
    delta[-1] = z[-1] - y  # l1/l2?
    for i in range(len(w)-1, 0, -1):
        delta[i-1] = np.dot(w[i].T, delta[i])
    return delta

backward(y, w, z)

[array([0.25, 0.25, 0.25]), array([0.25, 0.25]), array([0.5])]

In [166]:
def update(x, w, z, delta, eta=1):
    w[0] -= eta * np.outer(delta[0], x)  # +=?
    for i in range(1, len(w)):
        w[i] -= eta * np.outer(delta[i], z[i-1])
    return w

update(x, w, z, delta)

[array([[0.25, 0.25],
        [0.25, 0.25],
        [0.25, 0.25]]),
 array([[0.25, 0.25, 0.25],
        [0.25, 0.25, 0.25]]),
 array([[-0.25, -0.25]])]

In [167]:
init()

for i in range(50):
    z = forward(x, w, z)
    delta = backward(y, w, z)
    w = update(x, w, z, delta, eta=0.1)
    #print('z:', z, 'delta:', delta, 'w:', w, sep='\n')
    #print()

z, delta, w

([array([0.92790055, 0.92790055, 0.92790055]),
  array([1.29149914, 1.29149914]),
  array([1.])],
 [array([3.98829749e-16, 3.98829749e-16, 3.98829749e-16]),
  array([4.298195e-16, 4.298195e-16]),
  array([1.11022302e-15])],
 [array([[0.46395027, 0.46395027],
         [0.46395027, 0.46395027],
         [0.46395027, 0.46395027]]),
  array([[0.46395027, 0.46395027, 0.46395027],
         [0.46395027, 0.46395027, 0.46395027]]),
  array([[0.38714699, 0.38714699]])])

In [214]:
forward(np.array([1, 1]), w, z)

[array([0.92790055, 0.92790055, 0.92790055]),
 array([1.29149914, 1.29149914]),
 array([1.])]

In [215]:
forward(np.array([2, 2]), w, z)

[array([1.85580109, 1.85580109, 1.85580109]),
 array([2.58299827, 2.58299827]),
 array([2.])]

- 权重之和不必为一
- 学习率太高时会跑飞
- 偏置不是必须的

### Bias

In [158]:
def init():
    global x, w, b, z, y, delta
    x = 1 * np.ones(2)
    w = [
        0.5 * np.ones((3, 2)),
        0.5 * np.ones((2, 3)),
        0.5 * np.ones((1, 2))
    ]
    b = [
        0.5 * np.ones(3),
        0.5 * np.ones(2),
        0.5 * np.ones(1)
    ]
    z = [
        np.ones(3),
        np.ones(2),
        np.ones(1)
    ]
    y = 1 * np.ones(1)

    delta = [
        np.ones(3),
        np.ones(2),
        np.ones(1)
    ]

init()

In [159]:
def forward(x, w, z):
    z[0] = np.dot(w[0], x) + b[0]
    for i in range(1, len(w)):
        z[i] = np.dot(w[i], z[i-1]) + b[i]
    return z

forward(x, w, z)

[array([1.5, 1.5, 1.5]), array([2.75, 2.75]), array([3.25])]

In [160]:
def backward(y, w, z):
    delta[-1] = z[-1] - y
    for i in range(len(w)-1, 0, -1):
        delta[i-1] = np.dot(w[i].T, delta[i])
    return delta

backward(y, w, z)

[array([1.125, 1.125, 1.125]), array([1.125, 1.125]), array([2.25])]

In [161]:
def update(x, w, b, z, delta, eta=1):
    w[0] -= eta * np.outer(delta[0], x)
    b[0] -= eta * delta[0]
    for i in range(1, len(w)):
        w[i] -= eta * np.outer(delta[i], z[i-1])
        b[i] -= eta * delta[i]
    return w, b

update(x, w, b, z, delta)

([array([[-0.625, -0.625],
         [-0.625, -0.625],
         [-0.625, -0.625]]),
  array([[-1.1875, -1.1875, -1.1875],
         [-1.1875, -1.1875, -1.1875]]),
  array([[-5.6875, -5.6875]])],
 [array([-0.625, -0.625, -0.625]), array([-0.625, -0.625]), array([-1.75])])

In [162]:
init()

for i in range(1):
    print(i)
    print('z:', z, 'delta:', delta, 'w:', w, 'b', b, sep='\n')
    print()
    z = forward(x, w, z)
    delta = backward(y, w, z)
    w, b = update(x, w, b, z, delta, eta=0.1)

print('z:', z, 'delta:', delta, 'w:', w, 'b', b, sep='\n')

0
z:
[array([1., 1., 1.]), array([1., 1.]), array([1.])]
delta:
[array([1., 1., 1.]), array([1., 1.]), array([1.])]
w:
[array([[0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5]]), array([[0.5, 0.5, 0.5],
       [0.5, 0.5, 0.5]]), array([[0.5, 0.5]])]
b
[array([0.5, 0.5, 0.5]), array([0.5, 0.5]), array([0.5])]

z:
[array([1.5, 1.5, 1.5]), array([2.75, 2.75]), array([3.25])]
delta:
[array([1.125, 1.125, 1.125]), array([1.125, 1.125]), array([2.25])]
w:
[array([[0.3875, 0.3875],
       [0.3875, 0.3875],
       [0.3875, 0.3875]]), array([[0.33125, 0.33125, 0.33125],
       [0.33125, 0.33125, 0.33125]]), array([[-0.11875, -0.11875]])]
b
[array([0.3875, 0.3875, 0.3875]), array([0.3875, 0.3875]), array([0.275])]


## PyTorch

In [176]:
import torch
from torch import Tensor
from torch.utils.data import DataLoader
from torch import nn

In [177]:
training_data = [(Tensor([1., 1.]), 1)]
test_data = training_data
training_data

[(tensor([1., 1.]), 1)]

In [219]:
batch_size = 1

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

# Get cpu or gpu device for training.
#device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
device = 'cpu'
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(2, 3),
            nn.Linear(3, 2),
            nn.Linear(2, 1)
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

class SubLoss(nn.Module):
    def __init__(self):
        super(SubLoss, self).__init__()

    def forward(self, output, target):
        return output - target
#loss_fn = SubLoss()
loss_fn = nn.L1Loss()

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        print(pred, y)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

epochs = 50
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Using cpu device
NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): Linear(in_features=3, out_features=2, bias=True)
    (2): Linear(in_features=2, out_features=1, bias=True)
  )
)
Epoch 1
-------------------------------
tensor([[0.1295]], grad_fn=<AddmmBackward0>) tensor([1])
loss: 0.870456  [    1/    1]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.346312 

Epoch 2
-------------------------------
tensor([[0.6537]], grad_fn=<AddmmBackward0>) tensor([1])
loss: 0.346312  [    1/    1]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.216176 

Epoch 3
-------------------------------
tensor([[1.2162]], grad_fn=<AddmmBackward0>) tensor([1])
loss: 0.216176  [    1/    1]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.392748 

Epoch 4
-------------------------------
tensor([[0.6073]], grad_fn=<AddmmBackward0>) tensor([1])
loss: 0.392748  [    1/    1]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.109145 

Epoch 5
-------------------------------
t

In [220]:
model.eval()
x, y = next(iter(DataLoader(test_data)))
x, y = x.to(device), y.to(device)
with torch.no_grad():
    pred = model(x)
    print(f'Predicted: "{pred[0]}", Actual: "{y}"')

Predicted: "tensor([0.7735])", Actual: "tensor([1])"
