In [1]:
import torch
import torch.nn as nn
import numpy as np
from torchsummary import summary
from torch.optim import SGD
import time
from torch.utils.data import Dataset, DataLoader

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

x = torch.tensor([[1, 2], [3, 4], [5, 6], [7, 8]]).float().to(device)
y = torch.tensor([[3], [7], [11], [15]]).float().to(device)

In [3]:
class MyDataset(Dataset):
    def __init__(self, x, y):
        self.x = x.detach().clone()
        self.y = y.detach().clone()
    
    def __getitem__(self, ix):
        return self.x[ix], self.y[ix]
    
    def __len__(self):
        return len(self.x)

In [4]:
dataset = MyDataset(x, y)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

## Sequential Model

In [5]:
model = nn.Sequential(
            nn.Linear(2, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        ).to(device)

## Summary

In [6]:
summary(model, torch.zeros(1, 2));

Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 8]                   24
├─ReLU: 1-2                              [-1, 8]                   --
├─Linear: 1-3                            [-1, 1]                   9
Total params: 33
Trainable params: 33
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


**-1** in the first layer indicates that there can be many data points as the batch size.  
**-8** represents that we have an eight-dimensional output for each data point.

In [7]:
loss_function = nn.MSELoss()
opt = SGD(model.parameters(), lr=0.001)
loss_history = []

start = time.time()
for epoch in range(50):
    for ix, iy in dataloader:
        opt.zero_grad()
        loss_value = loss_function(model(ix), iy)
        loss_value.backward()
        opt.step()
        loss_history.append(float(loss_value))
end = time.time()
end - start

0.08975982666015625

## Predict the validation dataset

In [8]:
val = torch.tensor([[8, 9], [10, 11], [1.5, 2.5]]).float().to(device)
model(val)

tensor([[17.2221],
        [21.3720],
        [ 3.7211]], device='cuda:0', grad_fn=<AddmmBackward>)

# Saving and loading a PyTorch model

In [9]:
model.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.0368,  0.4641],
                      [-0.2258, -0.0473],
                      [-0.1077,  0.0541],
                      [ 0.5743,  0.7625],
                      [-0.5881, -0.2500],
                      [-0.2503,  0.1629],
                      [ 0.9699,  0.0856],
                      [-0.0517, -0.2644]], device='cuda:0')),
             ('0.bias',
              tensor([ 0.5240,  0.2746, -0.0632, -0.5095, -0.0185,  0.0709,  0.1256, -0.2187],
                     device='cuda:0')),
             ('2.weight',
              tensor([[ 0.3101, -0.0764, -0.2196,  0.8634,  0.2478, -0.1344,  0.7468, -0.1779]],
                     device='cuda:0')),
             ('2.bias', tensor([-0.0600], device='cuda:0'))])

## Saving

In [10]:
torch.save(model.to('cpu').state_dict(), 'mymodel.pth')

## Loading

In [11]:
new_model = nn.Sequential(
                nn.Linear(2, 8),
                nn.ReLU(),
                nn.Linear(8, 1)
            ).to(device)

state_dict = torch.load('mymodel.pth')

new_model.load_state_dict(state_dict)
new_model.to(device)

new_model(val)

tensor([[17.2221],
        [21.3720],
        [ 3.7211]], device='cuda:0', grad_fn=<AddmmBackward>)