## Learning a simple function

In this notebook, we'll learn the single function f(x) = a @ x + b

In [1]:
from minitorch import Tensor, Parameter, Module, Functions
from minitorch.optim import SGD
import numpy as np

In [2]:

# Setting up the 'a' coefs of the function to learn
coef = np.array([-1, 3, -2])

# Initializing some inputs randomly
inputs = Tensor(np.random.randn(10, 3))

# Setting 'b' to be 5 and computing f(x)
targets = Tensor(5 + inputs.data @ coef)

In [3]:
class Linear(Module):
    """
    Simple linear layer with a learned 'a' and 'b'
    """
    def __init__(self, in_dim, out_dim):
        super().__init__()
        
        self.Weights = Parameter(in_dim, out_dim)
        self.bias = Parameter(out_dim)
        
    def forward(self, inputs):
        return inputs @ self.Weights + self.bias

In [4]:
model = Linear(3, 1)
optimizer = SGD(model, lr=1e-3)
criterion = Functions.MSE

In [7]:
inputs.shape

(10, 3)

In [9]:
model.Weights.shape

(3, 1)

In [5]:
for epoch in range(1000):
    optimizer.zero_grad()
    
    outputs = model(inputs)
    loss = criterion(outputs[:, 0], targets)
    
    loss.backward()
    
    optimizer.step()
    
    if epoch % 50 == 0: 
        print(loss.item())

376.3260377417569
27.066574161498373
4.007283346412114
0.8058400714417325
0.2513825295261604
0.11345933238249598
0.06018717426920231
0.03351951732700439
0.018907883443664654
0.010700179104790604
0.00606024533506043
0.0034330250442664006
0.001944847656429682
0.0011017920974995016
0.0006241875142361024
0.0003536151310067823
0.00020033032118141371
0.0001134912960804293
6.429518190692069e-05
3.6424559166157606e-05


In [6]:
# Printing the learned parameters of the model
# Remember that we are trying to find a = [-1, 3, -2] and b = 5
print(model.Weights.data.reshape(-1).tolist())
print(model.bias.data)

[-1.0005635979741223, 3.0015627882827416, -2.001495748085129]
[4.99848795]
