In [195]:
import os
%matplotlib inline
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn

### The mechanics of learning

In [196]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4])

t_un = 0.1*t_u
t_u_std = (t_u - t_u.mean()) / t_u.std()

In [197]:
def model(t_u, w, b):
    return w * t_u + b

def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [198]:
fig = px.scatter(x=t_u.numpy(), y=t_c.numpy())
fig.update_traces(marker=dict(size=14))
fig.update_layout(
    xaxis_title = "Temperature (°Fahrenheit)",
    yaxis_title = "Temperature (°Celsius)")

In [199]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
loss = loss_fn(model(t_u, *params), t_c)
loss

tensor(1763.8848, grad_fn=<MeanBackward0>)

When we compute our loss while the parameters w and b require  radients, in addition to performing the actual computation, PyTorch creates the autograd graph with the operations as nodes. When we call loss.backward(), PyTorch traverses this graph in the reverse direction to compute the gradients

In [200]:
loss.backward()
params.grad

tensor([4517.2969,   82.6000])

In [201]:
def training_loop(n_epochs, learning_rate, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        if params.grad is not None:
            params.grad.zero_() 
            # Calling backward will lead derivatives to accumulate 
            # at leaf nodes. We need to zero the gradient explicitly 
            # after using it for parameter updates.
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        loss.backward()
        
        with torch.no_grad():
            params -= learning_rate * params.grad
        
        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
        
    return params

w, b = training_loop(
    n_epochs=5000,
    learning_rate=1e-2,
    params = torch.tensor([1.0, 0.0], requires_grad=True),
    t_u = t_un,
    t_c = t_c)

w, b # actual values are 0.555 and -17.7

Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


(tensor(5.3671, grad_fn=<UnbindBackward0>),
 tensor(-17.3012, grad_fn=<UnbindBackward0>))

##### Using a gradient descent optimizer

In [202]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [203]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
    return params

params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 5000,
    optimizer = optimizer,
    params = params,
    t_u = t_un,
    t_c = t_c)

Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

#### Validation set

In [204]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_t_un = t_un[train_indices]
train_t_c = t_c[train_indices]
train_t_u_std = t_u_std[train_indices]

val_t_un = t_un[val_indices]
val_t_c = t_c[val_indices]
val_t_u_std = t_u_std[val_indices]

# train_t_un = 0.1*train_t_u
# val_t_un = 0.1*val_t_u
val_indices

tensor([5, 2])

In [205]:
def training_loop(model, n_epochs, optimizer, params, train_t_u, val_t_u,
                train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)
        
        with torch.no_grad():
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        if epoch <= 3 or epoch % 100 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                f" Validation loss {val_loss.item():.4f}")
    return params

In [206]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    model = model,
    n_epochs = 5000,
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un,
    val_t_u = val_t_un,
    train_t_c = train_t_c,
    val_t_c = val_t_c)

Epoch 1, Training loss 87.7848, Validation loss 46.9723
Epoch 2, Training loss 42.9149, Validation loss 10.2668
Epoch 3, Training loss 35.9266, Validation loss 6.3460
Epoch 100, Training loss 24.4651, Validation loss 5.4157
Epoch 200, Training loss 17.4062, Validation loss 4.2865
Epoch 300, Training loss 12.7070, Validation loss 3.4738
Epoch 400, Training loss 9.5788, Validation loss 2.8831
Epoch 500, Training loss 7.4964, Validation loss 2.4492
Epoch 600, Training loss 6.1101, Validation loss 2.1273
Epoch 700, Training loss 5.1872, Validation loss 1.8860
Epoch 800, Training loss 4.5729, Validation loss 1.7032
Epoch 900, Training loss 4.1639, Validation loss 1.5636
Epoch 1000, Training loss 3.8917, Validation loss 1.4560
Epoch 1100, Training loss 3.7104, Validation loss 1.3724
Epoch 1200, Training loss 3.5898, Validation loss 1.3070
Epoch 1300, Training loss 3.5095, Validation loss 1.2554
Epoch 1400, Training loss 3.4560, Validation loss 1.2146
Epoch 1500, Training loss 3.4204, Validat

tensor([  5.3331, -17.1403], requires_grad=True)

In [207]:
w = np.linspace(5, 15, 50)
b = np.linspace(5, 15, 50)

w, b = np.meshgrid(w, b)

w, b = torch.tensor(w), torch.tensor(b)

def total_loss(w, b):
    return sum([(model(input, w, b) - actual)**2 for input, actual in zip(t_un, t_c)])/len(t_un)

loss_surface = total_loss(w, b)

In [208]:
go.Figure(data=[go.Surface(x=w, y=b, z=loss_surface), 
                go.Scatter3d(x=[8.9014], y=[10.3667], z=[total_loss(8.9014, 10.3667)])])

In [209]:
def q_model(t_u, w2, w1, b):
    return w2 * t_u ** 2 + w1 * t_u + b

q_params = torch.tensor([0.1, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
q_optimizer = optim.SGD([q_params], lr=learning_rate)

training_loop(
    model = q_model,
    n_epochs = 300,
    optimizer = q_optimizer,
    params = q_params,
    train_t_u = train_t_u_std,
    val_t_u = val_t_u_std,
    train_t_c = train_t_c,
    val_t_c = val_t_c)

Epoch 1, Training loss 176.5343, Validation loss 140.0265
Epoch 2, Training loss 164.3779, Validation loss 134.1946
Epoch 3, Training loss 153.3866, Validation loss 128.7344
Epoch 100, Training loss 7.8182, Validation loss 16.1005
Epoch 200, Training loss 2.9762, Validation loss 5.2998
Epoch 300, Training loss 2.3283, Validation loss 2.9280


tensor([1.1406, 9.0476, 9.0308], requires_grad=True)

### Training neural networks

In [210]:
linear_model = nn.Linear(1, 1)
linear_model.weight, linear_model.bias # or list(linear_model.parameters())

(Parameter containing:
 tensor([[-0.4508]], requires_grad=True),
 Parameter containing:
 tensor([-0.7260], requires_grad=True))

In [211]:
t_un_train = train_t_un.unsqueeze(1)
t_un_val = val_t_un.unsqueeze(1)
t_c_train = train_t_c.unsqueeze(1)
t_c_val = val_t_c.unsqueeze(1)

In [213]:
def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val,
    t_c_train, t_c_val):
    for epoch in range(1, n_epochs + 1):
        t_p_train = model(t_u_train)
        loss_train = loss_fn(t_p_train, t_c_train)
        
        with torch.no_grad():
            t_p_val = model(t_u_val)
            loss_val = loss_fn(t_p_val, t_c_val)
        
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
        
        if epoch == 1 or epoch % 1000 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                f" Validation loss {loss_val.item():.4f}")

In [214]:
linear_model = nn.Linear(1, 1)
optimizer = optim.SGD(
    linear_model.parameters(),
    lr=1e-2)
training_loop(
    n_epochs = 3000,
    optimizer = optimizer,
    model = linear_model,
    loss_fn = nn.MSELoss(),
    t_u_train = t_un_train,
    t_u_val = t_un_val,
    t_c_train = t_c_train,
    t_c_val = t_c_val)

Epoch 1, Training loss 281.4315, Validation loss 226.7162
Epoch 1000, Training loss 3.8470, Validation loss 1.4366
Epoch 2000, Training loss 3.3581, Validation loss 1.0923
Epoch 3000, Training loss 3.3497, Validation loss 1.0529


In [215]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[5.3267]], requires_grad=True),
 Parameter containing:
 tensor([-17.1038], requires_grad=True)]

In [216]:
fig = px.scatter(x=t_un.numpy(), y=t_c.numpy())
fig.update_traces(marker=dict(size=14))
fig.update_layout(
    xaxis_title = "Temperature (°Fahrenheit)",
    yaxis_title = "Temperature (°Celsius)")
fig.add_shape(type="line",
            x0 = 0,
            y0 = float(linear_model.bias),
            x1 = 10,
            y1 = float(linear_model(torch.tensor([10.0]))),)

In [217]:
linear_model(val_t_un.unsqueeze(1))

tensor([[ 8.9440],
        [13.8978]], grad_fn=<AddmmBackward0>)

In [None]:
x = torch.rand(10, 1)
linear_model(x)

tensor([[-11.7770],
        [-11.7770],
        [-11.7770],
        [-11.7770],
        [-11.7770],
        [-11.7770],
        [-11.7770],
        [-11.7770],
        [-11.7770],
        [-11.7770]], grad_fn=<AddmmBackward0>)