# Chapter 6 - Using a Neural Network to Fit the Data

## Part I: Neural Networks

In [1]:
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

In [2]:
# target
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]

# input
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]

In [3]:
t_c = torch.tensor(t_c).unsqueeze(1)
t_c.shape

torch.Size([11, 1])

In [6]:
t_u = torch.tensor(t_u).unsqueeze(1)
t_u.shape

torch.Size([11, 1])

In [8]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices

(tensor([ 1,  5,  8,  7,  0,  3,  6, 10,  2]), tensor([9, 4]))

In [9]:
t_u_train = t_u[train_indices]
t_c_train = t_c[train_indices]

t_u_val = t_u[val_indices]
t_c_val = t_c[val_indices]

t_un_train = 0.1 * t_u_train
t_un_val = 0.1 * t_u_val

In [10]:
import torch.nn as nn

linear_model = nn.Linear(1, 1)
linear_model(t_un_val)

tensor([[-0.9223],
        [-0.8381]], grad_fn=<AddmmBackward>)

In [11]:
linear_model.weight

Parameter containing:
tensor([[-0.2054]], requires_grad=True)

In [12]:
linear_model.bias

Parameter containing:
tensor([0.3181], requires_grad=True)

In [13]:
x = torch.ones(1)
linear_model(x)

tensor([0.1127], grad_fn=<AddBackward0>)

In [14]:
x = torch.ones(10, 1)
linear_model(x)

tensor([[0.1127],
        [0.1127],
        [0.1127],
        [0.1127],
        [0.1127],
        [0.1127],
        [0.1127],
        [0.1127],
        [0.1127],
        [0.1127]], grad_fn=<AddmmBackward>)

In [15]:
linear_model = nn.Linear(1, 1)
optimizer = torch.optim.SGD(linear_model.parameters(), lr=1e-2)

In [16]:
linear_model.parameters()

<generator object Module.parameters at 0x7fbdd4163048>

In [17]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[-0.9232]], requires_grad=True), Parameter containing:
 tensor([0.0063], requires_grad=True)]

In [18]:
def training_loop(n_epochs, optimizer, model, loss_fn, 
                  t_u_train, t_u_val, t_c_train, t_c_val):
    for epoch in range(1, n_epochs + 1):
        t_p_train = model(t_u_train)
        loss_train = loss_fn(t_p_train, t_c_train)
        
        t_p_val = model(t_u_val)
        loss_val = loss_fn(t_p_val, t_c_val)
        
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
        
        if epoch <= 5 or epoch % 1000 == 0:
            print('Epoch: {}, Training Loss: {}, Validation Loss: {}'.format(epoch, loss_train, loss_val))

In [19]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [20]:
linear_model = nn.Linear(1, 1)
optimizer = torch.optim.SGD(linear_model.parameters(), lr=1e-2)

In [21]:
training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    model=linear_model,
    loss_fn=loss_fn,
    t_u_train=t_un_train,
    t_u_val=t_un_val,
    t_c_train=t_c_train,
    t_c_val=t_c_val)

Epoch: 1, Training Loss: 148.83392333984375, Validation Loss: 96.12772369384766
Epoch: 2, Training Loss: 55.2896728515625, Validation Loss: 9.215256690979004
Epoch: 3, Training Loss: 39.087181091308594, Validation Loss: 0.36485016345977783
Epoch: 4, Training Loss: 36.18683624267578, Validation Loss: 1.3752779960632324
Epoch: 5, Training Loss: 35.57473373413086, Validation Loss: 2.6021058559417725
Epoch: 1000, Training Loss: 2.968538522720337, Validation Loss: 6.110190391540527
Epoch: 2000, Training Loss: 2.4372715950012207, Validation Loss: 6.509706497192383
Epoch: 3000, Training Loss: 2.428769588470459, Validation Loss: 6.56184196472168
Epoch: 4000, Training Loss: 2.428632974624634, Validation Loss: 6.568458557128906
Epoch: 5000, Training Loss: 2.4286303520202637, Validation Loss: 6.5692949295043945


In [22]:
print(linear_model.weight)
print(linear_model.bias)

Parameter containing:
tensor([[5.4878]], requires_grad=True)
Parameter containing:
tensor([-17.4613], requires_grad=True)


In [24]:
linear_model = nn.Linear(1, 1)
optimizer = torch.optim.SGD(linear_model.parameters(), lr=1e-2)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    model = linear_model,
    loss_fn = nn.MSELoss(),
    t_u_train = t_un_train,
    t_u_val = t_un_val, 
    t_c_train = t_c_train,
    t_c_val = t_c_val)

print()
print(linear_model.weight)
print(linear_model.bias)

Epoch: 1, Training Loss: 121.92767333984375, Validation Loss: 70.64339447021484
Epoch: 2, Training Loss: 49.28875732421875, Validation Loss: 6.022229194641113
Epoch: 3, Training Loss: 36.68735885620117, Validation Loss: 0.3333136737346649
Epoch: 4, Training Loss: 34.4119873046875, Validation Loss: 1.608373999595642
Epoch: 5, Training Loss: 33.912994384765625, Validation Loss: 2.76485013961792
Epoch: 1000, Training Loss: 2.9417526721954346, Validation Loss: 6.1213555335998535
Epoch: 2000, Training Loss: 2.436842203140259, Validation Loss: 6.511210918426514
Epoch: 3000, Training Loss: 2.428762435913086, Validation Loss: 6.562020778656006

Parameter containing:
tensor([[5.4817]], requires_grad=True)
Parameter containing:
tensor([-17.4268], requires_grad=True)


In [25]:
seq_model = nn.Sequential(
                nn.Linear(1, 13),
                nn.Tanh(),
                nn.Linear(13, 1))

seq_model

Sequential(
  (0): Linear(in_features=1, out_features=13, bias=True)
  (1): Tanh()
  (2): Linear(in_features=13, out_features=1, bias=True)
)

In [26]:
[param.shape for param in seq_model.parameters()]

[torch.Size([13, 1]), torch.Size([13]), torch.Size([1, 13]), torch.Size([1])]

In [28]:
for name, param in seq_model.named_parameters():
    print(name, ': ', param.shape)

0.weight :  torch.Size([13, 1])
0.bias :  torch.Size([13])
2.weight :  torch.Size([1, 13])
2.bias :  torch.Size([1])


In [29]:
from collections import OrderedDict

named_seq_model = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(1, 12)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(12, 1))
]))

named_seq_model

Sequential(
  (hidden_linear): Linear(in_features=1, out_features=12, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=12, out_features=1, bias=True)
)

In [30]:
class SubclassModel(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.hidden_linear = nn.Linear(1, 13)
        self.hidden_activation = nn.Tanh()
        self.output_linear = nn.Linear(13, 1)
        
    def forward(self, input):
        hidden_t = self.hidden_linear(input)
        activated_t = self.hidden_activation(hidden_t)
        output_t = self.output_linear(activated_t)
        return output_t

In [31]:
subclass_model = SubclassModel()
subclass_model

SubclassModel(
  (hidden_linear): Linear(in_features=1, out_features=13, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=13, out_features=1, bias=True)
)

In [32]:
for type_str, model in [('seq', seq_model),
                        ('named_seq_model', named_seq_model),
                        ('subclass_model', subclass_model)]:
    print(type_str)
    
    for name_str, param in model.named_parameters():
        print("{:21} {:19} {}".format(
            name_str, str(param.shape), param.numel()))
        
    print()

seq
0.weight              torch.Size([13, 1]) 13
0.bias                torch.Size([13])    13
2.weight              torch.Size([1, 13]) 13
2.bias                torch.Size([1])     1

named_seq_model
hidden_linear.weight  torch.Size([12, 1]) 12
hidden_linear.bias    torch.Size([12])    12
output_linear.weight  torch.Size([1, 12]) 12
output_linear.bias    torch.Size([1])     1

subclass_model
hidden_linear.weight  torch.Size([13, 1]) 13
hidden_linear.bias    torch.Size([13])    13
output_linear.weight  torch.Size([1, 13]) 13
output_linear.bias    torch.Size([1])     1

