In [1]:
import random
import torch
from torch import Tensor

## train_perceptron function

In [2]:
def sigmoid(z: Tensor):
    return 1 / (1 + torch.exp(-z))

def train_perceptron(X: Tensor, W: Tensor, b: Tensor, y_true: Tensor, mu: float):
    delta = torch.zeros(W.shape).to('cuda')
    beta = torch.zeros(b.shape).to('cuda')
    batch_size = len(X)
    for index in range(0, batch_size):
        z = X[index] @ W + b
        y = sigmoid(z)
        print(f"Prediction = {y}")
        error = y_true[index] - y
        delta += X[index].unsqueeze(-1) @ error.unsqueeze(0)
        beta += error
    W += delta / batch_size * mu
    b += beta / batch_size * mu
    return W, b

## Initialize tensors

In [3]:
m = 3
X = torch.rand((m, 784)).to('cuda')
W = torch.rand((784, 10)).to('cuda')
b = torch.rand((10)).to('cuda')
y_true = torch.zeros((m, 10)).to('cuda')
for index in range(0,m):
    y_true[index][random.randint(0, 9)] = 1
mu = 0.9
print(X.shape, W.shape, b.shape, y_true.shape)

torch.Size([3, 784]) torch.Size([784, 10]) torch.Size([10]) torch.Size([3, 10])


In [4]:
print(f"y_true: {y_true}")

y_true: tensor([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]], device='cuda:0')


## Start training

In [5]:
epochs = 10
for epoch in range(1, epochs + 1):
    print(f"Epoch: {epoch}")
    W, b = train_perceptron(X, W, b, y_true, mu)
    #print(f"Updated weights: {W}")
    #print(f"Updated biases: {b}")

Epoch: 1
Prediction = tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0')
Prediction = tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0')
Prediction = tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0')
Epoch: 2
Prediction = tensor([0.9965, 0.9991, 1.0000, 0.3172, 0.5889, 1.0000, 0.9998, 0.9953, 1.0000,
        0.9214], device='cuda:0')
Prediction = tensor([9.9401e-01, 9.9925e-01, 1.0000e+00, 5.3488e-04, 1.4010e-02, 1.0000e+00,
        8.6455e-01, 9.8930e-01, 1.0000e+00, 5.7402e-02], device='cuda:0')
Prediction = tensor([0.9968, 0.9545, 1.0000, 0.0187, 0.2797, 1.0000, 0.9987, 0.7003, 1.0000,
        0.5937], device='cuda:0')
Epoch: 3
Prediction = tensor([0.0000e+00, 0.0000e+00, 9.1833e-17, 4.1070e-13, 6.3763e-30, 3.8512e-38,
        0.0000e+00, 0.0000e+00, 8.9757e-35, 0.0000e+00], device='cuda:0')
Prediction = tensor([0.0000e+00, 0.0000e+00, 1.1439e-33, 1.1776e-12, 2.0771e-25, 2.0115e-36,
        0.0000e+00, 0.0000e+00, 1.8843e-15, 0.0000e+00]