In [70]:
%matplotlib inline
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn

torch.set_printoptions(edgeitems=2, linewidth=75)

url = 'https://raw.githubusercontent.com/deep-learning-with-pytorch/dlwpt-code/master/data/p1ch4/tabular-wine/winequality-white.csv'
wine = pd.read_csv(url, error_bad_lines=False, delimiter = ';', dtype = np.float32)

In [71]:
wineq = torch.tensor(wine.values)

In [72]:
data = wineq[:,:-1]
target = wineq[:,-1]

In [79]:
n_samples = data.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]



In [85]:
data_train = data[train_indices]
target_train = target[train_indices]

data_val = data[val_indices]
target_val = target[val_indices]

datan_train = 0.1 * data_train
datan_val = 0.1 * data_val

In [105]:
data_val.shape, target_val.shape, datan_train.shape, datan_val.shape

(torch.Size([979, 11]),
 torch.Size([979]),
 torch.Size([3919, 11]),
 torch.Size([979, 11]))

In [90]:
import torch.nn as nn

In [123]:
linear_model = nn.Linear(11, 1)

In [113]:
optimizer = optim.SGD(
    linear_model.parameters(),
    lr=1e-2)

In [98]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[-0.0541, -0.1397,  0.1324,  0.1919, -0.2868, -0.2292, -0.0616,
           0.0114, -0.1283, -0.0231, -0.2933]], requires_grad=True),
 Parameter containing:
 tensor([0.0996], requires_grad=True)]

In [115]:
def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val,
                  t_c_train, t_c_val):
    for epoch in range(1, n_epochs + 1):
        t_p_train = model(t_u_train) # <1>
        loss_train = loss_fn(t_p_train, t_c_train)

        t_p_val = model(t_u_val) # <1>
        loss_val = loss_fn(t_p_val, t_c_val)
        
        optimizer.zero_grad()
        loss_train.backward() # <2>
        optimizer.step()

        if epoch == 1 or epoch % 1000 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")

In [125]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [124]:
linear_model = nn.Linear(11, 1)
optimizer = optim.SGD(linear_model.parameters(), lr=1e-4)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    model = linear_model,
    loss_fn = nn.MSELoss(), # <1>
    t_u_train = datan_train,
    t_u_val = datan_val, 
    t_c_train = target_train,
    t_c_val = target_val)

print()
print(linear_model.weight)
print(linear_model.bias)

Epoch 1, Training loss 28.6960, Validation loss 28.4262
Epoch 1000, Training loss 3.6136, Validation loss 3.7112
Epoch 2000, Training loss 3.3340, Validation loss 3.4227
Epoch 3000, Training loss 3.0834, Validation loss 3.1645

Parameter containing:
tensor([[-0.0420,  0.1936, -0.2568,  0.0399,  0.2088,  0.0496,  0.3357,
         -0.1348, -0.1320,  0.1783,  0.5646]], requires_grad=True)
Parameter containing:
tensor([0.0815], requires_grad=True)


In [127]:
seq_model = nn.Sequential(
            nn.Linear(11, 1024), # <1>
            nn.Tanh(),
            nn.Linear(1024, 1)) # <2>

Sequential(
  (0): Linear(in_features=11, out_features=1024, bias=True)
  (1): Tanh()
  (2): Linear(in_features=1024, out_features=1, bias=True)
)

In [128]:
[param.shape for param in seq_model.parameters()]

[torch.Size([1024, 11]),
 torch.Size([1024]),
 torch.Size([1, 1024]),
 torch.Size([1])]

In [129]:
for name, param in seq_model.named_parameters():
    print(name, param.shape)

0.weight torch.Size([1024, 11])
0.bias torch.Size([1024])
2.weight torch.Size([1, 1024])
2.bias torch.Size([1])


In [131]:
from collections import OrderedDict

seq_model = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(11, 1024)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(1024, 1))
]))

seq_model

Sequential(
  (hidden_linear): Linear(in_features=11, out_features=1024, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=1024, out_features=1, bias=True)
)

In [132]:
for name, param in seq_model.named_parameters():
    print(name, param.shape)

hidden_linear.weight torch.Size([1024, 11])
hidden_linear.bias torch.Size([1024])
output_linear.weight torch.Size([1, 1024])
output_linear.bias torch.Size([1])


In [133]:
seq_model.output_linear.bias

Parameter containing:
tensor([-0.0133], requires_grad=True)

In [135]:
optimizer = optim.SGD(seq_model.parameters(), lr=1e-3) 

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    model = seq_model,
    loss_fn = nn.MSELoss(),
    t_u_train = datan_train,
    t_u_val = datan_val, 
    t_c_train = target_train,
    t_c_val = target_val)

print('output', seq_model(datan_val))
print('answer', target_val)
print('hidden', seq_model.hidden_linear.weight.grad)

Epoch 1, Training loss 0.7810, Validation loss 0.8140
Epoch 1000, Training loss 0.7805, Validation loss 0.8135
Epoch 2000, Training loss 0.7802, Validation loss 0.8130
Epoch 3000, Training loss 0.7799, Validation loss 0.8127
Epoch 4000, Training loss 0.7797, Validation loss 0.8125
Epoch 5000, Training loss 0.7795, Validation loss 0.8122
output tensor([[5.9006],
        [5.8953],
        [5.9405],
        [5.8663],
        [5.8521],
        [5.8640],
        [5.9047],
        [5.8634],
        [5.9190],
        [5.9110],
        [5.8889],
        [5.9026],
        [5.8850],
        [5.8650],
        [5.8649],
        [5.8501],
        [5.8640],
        [5.8709],
        [5.8631],
        [5.8677],
        [5.8485],
        [5.8637],
        [5.9663],
        [5.8668],
        [5.8848],
        [5.8649],
        [5.8441],
        [5.8768],
        [5.8593],
        [5.8646],
        [5.8784],
        [5.8866],
        [5.8345],
        [5.8788],
        [5.8888],
        [5.8626],
      