# Pytorch hands-on

In [0]:
!rm -r ./log

In [0]:
%tensorflow_version 2.x

In [0]:
%load_ext tensorboard
%tensorboard --logdir ./log

In [0]:
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn, optim, Tensor
from sklearn import datasets
from torch.utils.tensorboard import SummaryWriter

## Fitting Linear model

### Linear model module

In [0]:
class LinearModel(nn.Module):
    def __init__(self, dim_input: int, dim_output: int): #, init_w: float=1.0):
        # TODO: see why this call is required
        super(LinearModel, self).__init__()

        # Initialize linear layer
        self.linear = nn.Linear(dim_input, dim_output)
#         self.linear.weight.data.uniform_(-init_w, init_w)
#         self.linear.bias.data.uniform_(-init_w, init_w)

    def forward(self, x: torch.Tensor):
        # Apply linear layer
        return self.linear(x)

In [0]:
model = LinearModel(1, 1)
writer = SummaryWriter("./log/1")
writer.add_graph(model, (Tensor([[1.0]])))
writer.close()

### Loading dataset

Adapted from [scikit-learn example](https://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html)

* Reshape input tensor (`np.newaxis`)
  * The model needs inputs to have two dimensions for minibatching and the representing the input size, i.e., the number of elements of each input
* Normalization for making the mean of the points to 0 and the std to 1, both inputs and outputs
  * NOTE: This dataset seems to be normalized beforehand

In [0]:
def load_data():
    # Dataset description
    data = datasets.load_diabetes()
    print(data.DESCR)
    print()

    # Feature name
    ix_feature = 2
    print("Explanatory variable: {}".format(data.feature_names[ix_feature]))
    
    # Load the diabetes dataset
    diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)

    # Reshape input tensor
    # (442, 10) -> (442, 1)
    diabetes_X = diabetes_X[:, np.newaxis, ix_feature]

    # Split the data into training/testing sets
    diabetes_X_train = diabetes_X[:-20]
    # diabetes_X_test = diabetes_X[-20:]

    # Split the targets into training/testing sets
    diabetes_y_train = diabetes_y[:-20]
    # diabetes_y_test = diabetes_y[-20:]

    print(diabetes_X_train.shape)
    print(diabetes_y_train.shape)

    # Normalization
    x_m = np.mean(diabetes_X_train)
    x_s = np.std(diabetes_X_train)
    xs = (diabetes_X_train - x_m) / x_s

    y_m = np.mean(diabetes_y_train)
    y_s = np.std(diabetes_y_train)
    ys = (diabetes_y_train - y_m) / y_s

    return xs, ys

xs, ys = load_data()
plt.scatter(xs, ys)

### Fitting linear model by stochastic gradient

In [0]:
def train(model: nn.Module, xs: np.ndarray, ys: np.ndarray, n_train_steps: int):
    # From numpy array to torch tensor
    xs = Tensor(xs)
    ys = Tensor(ys)
    
    # Loss function: MSE loss
    # https://pytorch.org/docs/stable/nn.html#mseloss
    loss = nn.MSELoss()
    
    # Optimizer: Adam
    # https://pytorch.org/docs/stable/optim.html#torch.optim.Adam
    optimizer = optim.Adam(model.parameters(), lr=1e-2, amsgrad=True)

    for i in range(n_train_steps):
        # Reset gradient
        optimizer.zero_grad()
        
        # model.forward() is called
        # reshape(-1) converting shape from [422, 1] to [422]
        ys_pred = model(xs).reshape(-1)
        
        # Loss value
        loss_value = loss(ys_pred, ys)
        
        # Taking an optimization step
        # https://pytorch.org/docs/stable/optim.html#taking-an-optimization-step
        loss_value.backward()
        optimizer.step()
        
        if i % 200 == 0:
            print("Step={}, Loss = {}".format(i, loss_value))

### Prediction and plotting

In [0]:
def pred_and_plot(model: nn.Module, xs: np.ndarray, ys: np.ndarray):
    x_min = np.min(xs)
    x_max = np.max(xs)
    xs_ = np.arange(x_min, x_max, 0.1)[:, np.newaxis]

    # Prediction
    ys_ = model(Tensor(xs_))
    
    # From torch Tensor to numpy array
    # https://pytorch.org/tutorials/beginner/former_torchies/tensor_tutorial.html#converting-torch-tensor-to-numpy-array
    ys_ = ys_.detach().numpy()
    
    plt.scatter(xs, ys)
    plt.plot(xs_, ys_)

### Putting together

In [0]:
# Training
model = LinearModel(dim_input=1, dim_output=1)
xs, ys = load_data()
train(model, xs, ys, n_train_steps=1000)

# Prediction
pred_and_plot(model, xs, ys)

## Monitoring loss value during training with Tensorboard

In [0]:
def train_with_tensorboard(model: nn.Module, optimizer: optim.Adam, 
                           xs: np.ndarray, ys: np.ndarray,
                           n_train_steps: int, init_train_steps: int=0,
                           log_dir: str="./log/1"):
    # Tensorboard
    writer = SummaryWriter(log_dir)

    # From numpy array to torch tensor
    xs = Tensor(xs)
    ys = Tensor(ys)
    
    # Loss function: MSE loss
    # https://pytorch.org/docs/stable/nn.html#mseloss
    loss = nn.MSELoss()

    for i in range(init_train_steps, n_train_steps):
        # Reset gradient
        # https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch
        # https://discuss.pytorch.org/t/model-zero-grad-or-optimizer-zero-grad/28426/6
        optimizer.zero_grad()

        # model.forward() is called
        # reshape(-1) converting shape from [422, 1] to [422]
        ys_pred = model(xs).reshape(-1)
        
        # Loss value
        loss_value = loss(ys_pred, ys)
        writer.add_scalar("loss_value", loss_value, i)
        
        # Taking an optimization step
        # https://pytorch.org/docs/stable/optim.html#taking-an-optimization-step
        loss_value.backward()
        optimizer.step()
        
        if i % 20 == 0:
            print("Step={}, Loss = {}".format(i, loss_value))

    writer.close()

    return model, optimizer

In [0]:
# Set random seed for reproducibility
# https://pytorch.org/docs/stable/notes/randomness.html
torch.manual_seed(0)

# Training
model = LinearModel(dim_input=1, dim_output=1)
# https://pytorch.org/docs/stable/optim.html#torch.optim.Adam
opt = optim.Adam(model.parameters(), lr=1e-2, amsgrad=True)
xs, ys = load_data()
train_with_tensorboard(model, opt, xs, ys, n_train_steps=100)

# Prediction
pred_and_plot(model, xs, ys)

## Saving/loading trained model

In [0]:
def train_and_save(model: nn.Module, opt: optim.Adam, xs: np.ndarray,
                   ys: np.ndarray, n_train_steps: int):
    train_with_tensorboard(model, opt, xs, ys, n_train_steps,
                           log_dir="./log/2")

    torch.save({
        "model": model.state_dict(),
        "optimizer": opt.state_dict(),
    }, "./checkpoint.pt")


def load_and_train(model: nn.Module, opt: optim.Adam, xs: np.ndarray,
                   ys: np.ndarray, n_train_steps: int, init_train_steps: int):
    checkpoint = torch.load("./checkpoint.pt")

    model.load_state_dict(checkpoint["model"])
    opt.load_state_dict(checkpoint["optimizer"])

    train_with_tensorboard(model, opt, xs, ys, n_train_steps, init_train_steps,
                           log_dir="./log/3")

In [0]:
# Set random seed for reproducibility
# https://pytorch.org/docs/stable/notes/randomness.html
torch.manual_seed(0)

# Train and save internal states of the model and the optimizer
model = LinearModel(dim_input=1, dim_output=1)
opt = optim.Adam(model.parameters(), lr=1e-2, amsgrad=True)
xs, ys = load_data()
train_and_save(model, opt, xs, ys, n_train_steps=20)

# Make sure that loading model works for checkpointing
model_ = LinearModel(dim_input=1, dim_output=1)
opt_ = optim.Adam(model_.parameters(), lr=1e-2, amsgrad=True)
load_and_train(model_, opt_, xs, ys, 100, 20)