## Neural networks
### mental model of learning
![](../images/model_of_learning.png)

In [32]:
import torch
import torch.nn as nn
import torch.optim as optim

In [63]:
# same example data as chapter 5
t_c = torch.tensor([0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]) # in celcius
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]) # in unknown unit
# This makes the size (11) to (11,1)
# basically we added an extra dimension of "1" at the end
# Pytorch expects batch size at the 0th position
# here the batch size is 11 (11 data points) and there is just one feature(the temp). That's why we do that.
# WE want to reshape it in that form before passing it to the network, as it expects it in that form.
t_c = torch.tensor(t_c).unsqueeze(1)  
t_u = torch.tensor(t_u).unsqueeze(1)

n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)
shuffled_indices = torch.randperm(n_samples)
train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices



(tensor([9, 8, 0, 3, 2, 5, 1, 4, 7]), tensor([10,  6]))

In [64]:
t_u_train = t_u[train_indices]
t_c_train = t_c[train_indices]

t_u_val = t_u[val_indices]
t_c_val = t_c[val_indices]

t_un_train = 0.1 * t_u_train
t_un_val = 0.1 * t_u_val

In [65]:
t_un_val.size()

torch.Size([2, 1])

In [66]:
# has a __call__ method defined already, which sets up "hooks",ie, things for the forward call to work correctly
linear_model = nn.Linear(1,1) #(input and output tensor size)
linear_model(t_un_val) # alternative to calling forward with the same arguments

tensor([[-5.6589],
        [-2.5575]], grad_fn=<AddmmBackward>)

In [67]:
linear_model.weight

Parameter containing:
tensor([[-0.8990]], requires_grad=True)

In [68]:
linear_model.bias

Parameter containing:
tensor([0.4899], requires_grad=True)

In [69]:
# can have results for a batch, where the zeroth dimension withh be the batch size
linear_model(torch.ones(10,1))

tensor([[-0.4090],
        [-0.4090],
        [-0.4090],
        [-0.4090],
        [-0.4090],
        [-0.4090],
        [-0.4090],
        [-0.4090],
        [-0.4090],
        [-0.4090]], grad_fn=<AddmmBackward>)

## using the linear model
- it's in the nn module, but, not a neural network yet.
- Even the results we get are the exact same as before.

In [70]:
def training_loop(n_epochs, model, optimizer, loss_fn, t_u_train, t_c_train, t_u_val, t_c_val):
    for i in range(1, n_epochs+1):
        optimizer.zero_grad()

        train_predicted = model(t_u_train)
        train_loss = loss_fn(train_predicted, t_c_train)
        val_loss = loss_fn(model(t_u_val), t_c_val)
        train_loss.backward() # calc gradients
        optimizer.step() # update parameters

        if i%500==0:
            print(f"epoch {i} training loss {train_loss}, validation loss {val_loss}") 

In [71]:
linear_model = nn.Linear(1, 1)
## the optimizer with optimize the params of the linear model
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

training_loop(
    n_epochs = 5000,
    model = linear_model,
    optimizer = optimizer,
    loss_fn = nn.MSELoss(), # MSELoss is a class, we instantiate it and pass the function to the loop
    t_u_train = t_un_train,
    t_c_train = t_c_train,
    t_u_val = t_un_val,
    t_c_val = t_c_val,

)
print(list(linear_model.parameters()))

epoch 500 training loss 8.659355163574219, validation loss 7.074272155761719
epoch 1000 training loss 3.8009579181671143, validation loss 4.253093242645264
epoch 1500 training loss 2.8559889793395996, validation loss 4.459417343139648
epoch 2000 training loss 2.6721911430358887, validation loss 4.8325395584106445
epoch 2500 training loss 2.636443614959717, validation loss 5.051978588104248
epoch 3000 training loss 2.629490613937378, validation loss 5.159413814544678
epoch 3500 training loss 2.6281378269195557, validation loss 5.208893775939941
epoch 4000 training loss 2.627875328063965, validation loss 5.231110572814941
epoch 4500 training loss 2.627823829650879, validation loss 5.240985870361328
epoch 5000 training loss 2.6278128623962402, validation loss 5.2453765869140625
[Parameter containing:
tensor([[5.4199]], requires_grad=True), Parameter containing:
tensor([-17.9863], requires_grad=True)]
