In [2]:
import numpy as np

# Define a basic Tensor class
class Tensor:
    def __init__(self, data):
        self.data = np.array(data, dtype=float)
        self.grad = None
        self._backward = lambda: None

    def backward(self, grad=None):
        if grad is not None:
            self.grad = grad
        else:
            if self.grad is None:
                self.grad = np.ones_like(self.data)
        self._backward()

# Define a linear layer
class Linear:
    def __init__(self, input_size, output_size):
        self.weights = Tensor(np.random.randn(input_size, output_size) * np.sqrt(2. / input_size))
        self.bias = Tensor(np.zeros(output_size))

    def __call__(self, x):
        self.input = x
        self.output = Tensor(x.data @ self.weights.data + self.bias.data)
        self.output._backward = self.backward
        return self.output

    def backward(self):
        self.input.grad = self.output.grad @ self.weights.data.T
        self.weights.grad = self.input.data.T @ self.output.grad
        self.bias.grad = self.output.grad.mean(axis=0)

# Define sigmoid activation function and its backward pass
def sigmoid(x):
    output = Tensor(1 / (1 + np.exp(-x.data)))
    output._backward = lambda: sigmoid_backward(x, output)
    return output

def sigmoid_backward(input, output):
    sig = 1 / (1 + np.exp(-input.data))
    input.grad = output.grad * sig * (1 - sig)

# Define mean squared error loss
def mse_loss(y_pred, y_true):
    diff = y_pred.data - y_true.data
    loss = Tensor(np.mean(diff ** 2))
    loss._backward = lambda: mse_loss_backward(y_pred, y_true, loss)
    return loss

def mse_loss_backward(y_pred, y_true, output):
    diff = y_pred.data - y_true.data
    y_pred.grad = output.grad * 2 * diff / y_pred.data.size

# Define an MLP with a single hidden layer
class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.layer1 = Linear(input_size, hidden_size)
        self.layer2 = Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = sigmoid(x)
        x = self.layer2(x)
        return x

# Example usage
model = MLP(input_size=3, hidden_size=5, output_size=1)
x = Tensor([[1, 2, 3]])
y_true = Tensor([[0]])

# Forward pass
y_pred = model.forward(x)

# Compute loss
loss = mse_loss(y_pred, y_true)

# Backward pass
loss.backward()

# Check gradients
model.layer1.weights.grad, model.layer1.bias.grad, model.layer2.weights.grad, model.layer2.bias.grad


(None, None, None, None)