In [7]:
from tensor import Tensor
from loss import MSELoss
import nn
import numpy as np
from helper import unbroadcast
import activations as act
from visualization import draw_dot

n = 1000
# Create (100, 3) input dataset with values from a uniform distribution
X_data = np.random.uniform(low=-10, high=10, size=(n, 3)).astype(np.float32)
Y_data = (X_data[:, 0] + X_data[:, 1]*X_data[:, 2]).reshape(-1, 1)

# Normalize data
X_mean, X_std = X_data.mean(axis=0), X_data.std(axis=0)
Y_mean, Y_std = Y_data.mean(),    Y_data.std()

X_data = (X_data - X_mean) / X_std
Y_data = (Y_data - Y_mean) / Y_std

# Wrap with your Tensor class
x_train = Tensor(X_data[:int(0.8*n)], requires_grad=False)
y_train = Tensor(Y_data[:int(0.8*n)], requires_grad=False)

x_test = Tensor(X_data[int(0.8*n):], requires_grad=False)
y_test = Tensor(Y_data[int(0.8*n):], requires_grad=False)

print(x_train.shape, y_train.shape)

(800, 3) (800, 1)


In [8]:
hidden_size_1 = 16
model = nn.Sequential(
    nn.Linear(3, hidden_size_1, requires_grad=True),  # first linear
    nn.ReLU(),                                         # activation
    nn.Linear(hidden_size_1, 1, requires_grad=True),
)

In [13]:
lr = 1e-3
epochs = 1000

for epoch in range(1, epochs+1):
    model.zero_grad()

    # forward
    y_pred = model(x_train)
    loss   = MSELoss(y_train, y_pred)
    loss.backward()

    # update
    for p in model.parameters():
        p.data -= lr * p.grad

    if epoch % 50 == 0:
        test_pred = model(x_test)
        test_loss = MSELoss(y_test, test_pred)
        print(f"Epoch {epoch:4d} | train={loss.data:.6f} | test={test_loss.data:.6f}")

# final
final_pred = model(x_test)
print("Final test loss:", MSELoss(y_test, final_pred).data)

Epoch   50 | train=0.134763 | test=0.114759
Epoch  100 | train=0.128887 | test=0.109293
Epoch  150 | train=0.123443 | test=0.104255
Epoch  200 | train=0.118408 | test=0.099634
Epoch  250 | train=0.113728 | test=0.095390
Epoch  300 | train=0.109396 | test=0.091456
Epoch  350 | train=0.105400 | test=0.087840
Epoch  400 | train=0.101714 | test=0.084534
Epoch  450 | train=0.098309 | test=0.081507
Epoch  500 | train=0.095129 | test=0.078717
Epoch  550 | train=0.092169 | test=0.076148
Epoch  600 | train=0.089401 | test=0.073776
Epoch  650 | train=0.086824 | test=0.071585
Epoch  700 | train=0.084433 | test=0.069570
Epoch  750 | train=0.082206 | test=0.067713
Epoch  800 | train=0.080130 | test=0.066000
Epoch  850 | train=0.078191 | test=0.064416
Epoch  900 | train=0.076381 | test=0.062950
Epoch  950 | train=0.074691 | test=0.061593
Epoch 1000 | train=0.073107 | test=0.060332
Final test loss: 0.060331915


In [14]:
a = Tensor([1.0, 3.0, 6.0], requires_grad=False)
a.data = (a.data - X_mean) / X_std
a = a.reshape(1, -1)  # reshape to match input shape
out = model(a)
out.data = out.data * Y_std + Y_mean  # denormalize output
print(out)

Tensor(data=[[24.698574]])
