In [1]:
import numpy as np

In [2]:
x = np.array([1.0, 2.0, -1.0])  # 3 input features
w = np.array([0.5, -1.0, 2.0])  # 3 weights
b = 0.1


In [3]:
def relu(x):
    return np.maximum(0, x)


def single_neuron(x, w, b):
    op = np.dot(x, w) + b
    return relu(op)


output = single_neuron(x, w, b)
print(output)

0.0


## Forward Pass for single input and single output


In [4]:
def forward_pass(x, w1, b1, w2, b2):
    h = relu(np.dot(x, w1) + b1)
    y = np.dot(h, w2) + b2
    return y


n_inputs = 3
n_hidden = 4
n_outputs = 1

w1 = np.random.rand(n_inputs, n_hidden)
b1 = np.random.rand(n_hidden)
w2 = np.random.rand(n_hidden, n_outputs)
b2 = np.random.rand(n_outputs)

output = forward_pass(x, w1, b1, w2, b2)
print(output)

[4.89642761]


In [5]:
import numpy as np


def relu(x):
    return np.maximum(0, x)


def forward_pass(x, W1, b1, W2, b2):
    # x:  (n_in,)
    # W1: (n_hidden, n_in)
    # b1: (n_hidden,)
    # W2: (n_out, n_hidden)
    # b2: (n_out,)

    # hidden layer
    z1 = np.dot(W1, x) + b1  # shape (n_hidden,)
    h = relu(z1)  # shape (n_hidden,)

    # output layer
    z2 = np.dot(W2, h) + b2  # shape (n_out,)
    y_hat = z2  # no activation on output for now

    return y_hat, h, z1, z2


# dimensions
n_in = 3
n_hidden = 4
n_out = 1

# random init
np.random.seed(0)
W1 = np.random.randn(n_hidden, n_in)
b1 = np.zeros(n_hidden)
W2 = np.random.randn(n_out, n_hidden)
b2 = np.zeros(n_out)

# example input
x = np.array([1.0, 2.0, -1.0])

y_hat, h, z1, z2 = forward_pass(x, W1, b1, W2, b2)
print("Hidden pre-activation z1:", z1)
print("Hidden activation h:", h)
print("Output y_hat:", y_hat)


Hidden pre-activation z1: [ 1.58562878  6.95328706  0.75059285 -0.75558786]
Hidden activation h: [1.58562878 6.95328706 0.75059285 0.        ]
Output y_hat: [2.38592521]


## Forward pass for multiple inputs


In [7]:
import numpy as np


def relu(x):
    return np.maximum(0, x)


def forward_pass(x, W1, b1, W2, b2):
    # x:  (N, n_in)     -- batch of N examples
    # W1: (n_in, n_hidden)
    # b1: (n_hidden,)
    # W2: (n_hidden, n_out)
    # b2: (n_out,)

    # hidden layer
    Z1 = x @ W1 + b1  # (N, n_hidden)
    H = relu(Z1)  # (N, n_hidden)
    Z2 = H @ W2 + b2  # (N, n_out)
    Y_hat = Z2
    return Y_hat, H, Z1, Z2


# dimensions
n_in = 3
n_hidden = 4
n_out = 1

# random init
np.random.seed(0)
# For batched input we want W1 shape (n_in, n_hidden) and W2 shape (n_hidden, n_out)
W1 = np.random.randn(n_in, n_hidden)
b1 = np.zeros(n_hidden)
W2 = np.random.randn(n_hidden, n_out)
b2 = np.zeros(n_out)

# example input: 4 examples (N=4), each with n_in features -> shape (4, n_in)
x = np.random.randn(4, n_in)  # shape (4, 3)

y_hat, h, z1, z2 = forward_pass(x, W1, b1, W2, b2)
print("Hidden pre-activation z1 shape:", z1.shape)
print("Hidden pre-activation z1:", z1)
print("Hidden activation h shape:", h.shape)

print("Hidden activation h:", h)

print("Output y_hat shape:", y_hat.shape)
print("Output y_hat:", y_hat)

Hidden pre-activation z1 shape: (4, 4)
Hidden pre-activation z1: [[ 2.22017425  0.92690827  1.31248884  3.83440988]
 [-6.34199188  2.42158272 -3.16735244 -0.57698372]
 [-0.09540698  2.00316968  0.46787771  5.35028524]
 [-2.46079959 -0.7035511  -1.40693092 -3.53822054]]
Hidden activation h shape: (4, 4)
Hidden activation h: [[2.22017425 0.92690827 1.31248884 3.83440988]
 [0.         2.42158272 0.         0.        ]
 [0.         2.00316968 0.46787771 5.35028524]
 [0.         0.         0.         0.        ]]
Output y_hat shape: (4, 1)
Output y_hat: [[3.66442762]
 [0.29464612]
 [2.23666225]
 [0.        ]]


## Pytorch implementation


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim

# for reproducibility
torch.manual_seed(0)

# toy regression dataset: 100 samples, 3 features
N, n_in, n_hidden, n_out = 100, 3, 4, 1
X = torch.randn(N, n_in)  # inputs
y = torch.randn(N, n_out)  # targets

In [9]:
class SimpleNet(nn.Module):
    def __init__(self, n_in, n_hidden, n_out):
        super().__init__()
        self.fc1 = nn.Linear(n_in, n_hidden)  # W1, b1
        self.fc2 = nn.Linear(n_hidden, n_out)  # W2, b2

    def forward(self, x):
        z1 = self.fc1(x)  # linear
        h = torch.relu(z1)  # ReLU
        z2 = self.fc2(h)  # output linear
        return z2  # y_hat
        # if you want also h, z1, z2 like before, you can return them too
        # return z2, h, z1, z2


model = SimpleNet(n_in, n_hidden, n_out)
print(model)


SimpleNet(
  (fc1): Linear(in_features=3, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=1, bias=True)
)


In [13]:
loss_fn = nn.MSELoss()  # like your mse_loss
optimizer = optim.SGD(model.parameters(), lr=0.01)


In [14]:
num_epochs = 200

for epoch in range(num_epochs):
    # 1. Forward pass
    y_hat = model(X)  # shape (N, 1)

    # 2. Compute loss
    loss = loss_fn(y_hat, y)

    # 3. Zero old gradients
    optimizer.zero_grad()

    # 4. Backward pass (compute gradients)
    loss.backward()

    # 5. Gradient descent step
    optimizer.step()

    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch + 1}, loss = {loss.item():.4f}")


Epoch 20, loss = 1.6561
Epoch 40, loss = 1.6249
Epoch 60, loss = 1.6073
Epoch 80, loss = 1.5950
Epoch 100, loss = 1.5857
Epoch 120, loss = 1.5781
Epoch 140, loss = 1.5722
Epoch 160, loss = 1.5674
Epoch 180, loss = 1.5633
Epoch 200, loss = 1.5597


In [15]:
x_test = torch.randn(1, n_in)  # one new sample
y_pred = model(x_test)
print("Predicted:", y_pred)


Predicted: tensor([[0.3311]], grad_fn=<AddmmBackward0>)
