In [1]:
import torch
import numpy 

# temp, rainfall, humidity 
inputs = torch.tensor([[73, 67, 43], 
                        [91, 88, 64], 
                        [87, 134, 58], 
                        [102, 43, 37], 
                        [69, 96, 70]], 
    dtype = torch.float32)

# apples, oranges, these are target values for model to predict
targets = torch.tensor([[56, 70], 
                        [81, 101], 
                        [119, 133], 
                        [27, 37], 
                        [103, 119]], 
    dtype = torch.float32)

#start weights as a matrix of random values
w = torch.randn(2, 3, requires_grad = True)
#start bias as a vector of random values
b = torch.randn(2, requires_grad = True)

#multiply input matrix by weight matrix, and then add bias vector
#5*3 matrix * 3*2 matrix = 5*2 matrix + 5*2 matrix (vector)  = 5*2 matrix

def model (x): # @ is matrix multiplication in torch, .t() transposes matrix, flips rows and columns i think
    return x @ w.t() + b  #because b = a vector, it BROADCASTS it into a vector

preds = model(inputs)

def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

loss = mse(preds, targets)
#off by an average of the sqrt of loss (that is our loss)

#compute gradients calculated gradients are then stored in . grad of every leaf node
loss.backward()

#original weights

#positive slope
#if gradient element is positive, slightly increasing weight will increase loss, slightly decreasing weight will decrease loss

#negative slope
#if gradient element is negative, slightly increasing weight will decrease loss, slightly increasing weight will increase loss

#so move in the opposite direction of the gradient if gradient is positive, decrease element value, if gradient negative, increase
#goal is to decrease loss (gradient descent)

#reset weight derivatives and bias derivatives

w.grad.zero_()
b.grad.zero_()

#now to adjust values, subtract small proportion of gradient

for i in range(20000): #training loop for 20000 epochs
    preds = model(inputs) #get predicitions
    loss = mse(preds, targets) #get loss 
    loss.backward() #get gradients
    with torch.no_grad(): 
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5 #hyper parameter that you can adjust also called learning rate
        w.grad.zero_()
        b.grad.zero_() #reset gradients back to 0 (because they werent 0 when you did loss.backward)

print(loss)
print(preds)
print(targets)

tensor(1.8449, grad_fn=<DivBackward0>)
tensor([[ 58.2874,  70.3306],
        [ 83.2848, 100.6570],
        [118.4022, 132.9541],
        [ 25.2303,  37.0124],
        [100.9365, 119.1421]], grad_fn=<AddBackward0>)
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 27.,  37.],
        [103., 119.]])
