In [19]:
import torch
from torch import Tensor

m = 1000

def sigmoid(z):
    return 1 / (1 + torch.exp(-z))


def train_perceptron(X: Tensor, W: Tensor, b: Tensor, y_true: Tensor, mu: float):
    # Forward propagation
    z = torch.matmul(X, W) + b
    y_pred = sigmoid(z)
    # Calculate the error
    error = y_true - y_pred
    # Backward propagation
    dW = torch.matmul(X.T, error)
    db = torch.sum(error, dim=0)

    # Update weights and biases using gradient descent
    W += mu * dW
    b += mu * db

    return W, b


# Initialize input, weights, biases, true labels, and learning rate
X = torch.rand((m, 784))
print("X: ", X.shape)
W = torch.rand((784, 10))
b = torch.rand((10,))
y_true = torch.rand((m, 10))
mu = 0.01  # Learning rate
print("W: ", W, '\n')
print(W.shape, '\n')
print("b: ", b, '\n')
print(b.shape, '\n')


updated_W, updated_b = train_perceptron(X, W, b, y_true, mu)

print("updated_W: ", updated_W, '\n')
print(updated_W.shape, '\n')

print("updated_b: ", updated_b, '\n')
print(updated_b.shape)

X:  torch.Size([1000, 784])
W:  tensor([[0.4334, 0.9714, 0.4797,  ..., 0.5436, 0.9603, 0.7045],
        [0.7524, 0.8429, 0.1382,  ..., 0.4014, 0.9139, 0.0792],
        [0.0927, 0.0469, 0.9074,  ..., 0.6663, 0.6543, 0.8936],
        ...,
        [0.3289, 0.6569, 0.7988,  ..., 0.1852, 0.7385, 0.3693],
        [0.5425, 0.3359, 0.4454,  ..., 0.4468, 0.1086, 0.9307],
        [0.5153, 0.2158, 0.1907,  ..., 0.7596, 0.0402, 0.3041]]) 

torch.Size([784, 10]) 

b:  tensor([0.7306, 0.3782, 0.5567, 0.6488, 0.7778, 0.7218, 0.1302, 0.8943, 0.7964,
        0.9970]) 

torch.Size([10]) 

updated_W:  tensor([[-2.0187, -1.5334, -2.0419,  ..., -1.9811, -1.4742, -1.7509],
        [-1.6922, -1.5757, -2.3691,  ..., -2.0934, -1.5549, -2.3458],
        [-2.4424, -2.4397, -1.6736,  ..., -1.8840, -1.9116, -1.5708],
        ...,
        [-2.2453, -1.9299, -1.8280,  ..., -2.3888, -1.8306, -2.1709],
        [-1.9870, -2.2021, -2.1529,  ..., -2.1522, -2.5006, -1.5931],
        [-1.9423, -2.2300, -2.3644,  ..., -1.73