# PyTorch Basics

Modules

In [1]:
import torch
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms

## Basic autograd example 1

Create tensors

In [2]:
x = torch.tensor(1., requires_grad=True)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

Build a computational graph

In [3]:
y = w * x + b    # y = 2 * x + 3

Compute gradients

In [4]:
y.backward()

Print out the gradients

In [5]:
print(x.grad)    # x.grad = 2 
print(w.grad)    # w.grad = 1 
print(b.grad)    # b.grad = 1 

tensor(2.)
tensor(1.)
tensor(1.)


## Basic autograd example 2 

Create tensors of shape (10, 3) and (10, 2).

In [6]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)
print('x: ', x)
print('y: ', y)

x:  tensor([[ 2.1669,  0.5003,  0.0804],
        [ 1.3743, -0.9538, -0.3715],
        [-2.0673, -0.4940, -1.1851],
        [-0.1229, -0.7061, -1.2551],
        [-0.9564,  1.4436, -0.0866],
        [-1.1118,  2.2743, -0.7404],
        [-0.6295,  0.0563,  0.3117],
        [-1.0451,  0.0816,  0.1476],
        [ 0.4311, -1.1811,  0.9732],
        [-1.8541,  0.1990,  0.7300]])
y:  tensor([[ 0.2557, -2.7736],
        [-0.8977,  0.3874],
        [-0.0161, -0.0375],
        [ 0.7784,  0.2388],
        [-0.9064, -0.5864],
        [ 0.4103, -0.8227],
        [ 0.7677, -2.0712],
        [-0.2699, -0.2790],
        [ 0.8492, -0.4041],
        [ 1.4301,  0.1800]])


Build a fully connected layer.

In [7]:
linear = nn.Linear(3, 2)
print ('w: ', linear.weight)
print ('b: ', linear.bias)

w:  Parameter containing:
tensor([[ 0.0812, -0.2913,  0.0858],
        [ 0.2917, -0.4463,  0.1069]], requires_grad=True)
b:  Parameter containing:
tensor([-0.2974, -0.4959], requires_grad=True)


Build loss function and optimizer.

In [8]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)

Forward pass.

In [9]:
pred = linear(x)
print(pred)

tensor([[-0.2603, -0.0784],
        [ 0.0601,  0.2910],
        [-0.4231, -1.0053],
        [-0.2094, -0.3508],
        [-0.8030, -1.4285],
        [-1.1137, -1.9145],
        [-0.3382, -0.6714],
        [-0.3934, -0.8215],
        [ 0.1651,  0.2611],
        [-0.4434, -1.0476]], grad_fn=<ThAddmmBackward>)


Compute loss.

In [10]:
loss = criterion(pred, y)
print('loss: ', loss.item())

loss:  1.2268400192260742


Backward pass

In [11]:
loss.backward()

Print out the gradients

In [12]:
print ('dL/dw: ', linear.weight.grad) 
print ('dL/db: ', linear.bias.grad)

dL/dw:  tensor([[ 0.6760, -0.3227,  0.0048],
        [ 1.2049, -0.2360,  0.3128]])
dL/db:  tensor([-0.6161, -0.0598])


1-step gradient descent.

In [13]:
optimizer.step()
print ('w: ', linear.weight)
print ('b: ', linear.bias)

w:  Parameter containing:
tensor([[ 0.0745, -0.2881,  0.0857],
        [ 0.2797, -0.4440,  0.1038]], requires_grad=True)
b:  Parameter containing:
tensor([-0.2913, -0.4953], requires_grad=True)


You can also perform gradient descent at the low level.

`linear.weight.data.sub_(0.01 * linear.weight.grad.data)`

`linear.bias.data.sub_(0.01 * linear.bias.grad.data)`

Print out the loss after 1-step gradient descent.

In [14]:
pred = linear(x)
loss = criterion(pred, y)
print('loss after 1 step optimization: ', loss.item())

loss after 1 step optimization:  1.2015973329544067


## Loading data from numpy

Create a numpy array

In [15]:
x = np.array([[1, 2], [3, 4]])
x

array([[1, 2],
       [3, 4]])

Convert the numpy array to a torch tensor.

In [16]:
y = torch.from_numpy(x)
y

tensor([[1, 2],
        [3, 4]], dtype=torch.int32)

Convert the torch tensor to a numpy array.

In [17]:
z = y.numpy()
z

array([[1, 2],
       [3, 4]])

## Input pipline

Download and construct CIFAR-10 dataset.

In [18]:
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                             train=True, 
                                             transform=transforms.ToTensor(),
                                             download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../../data/cifar-10-python.tar.gz


Fetch one data pair (read data from disk).

In [19]:
image, label = train_dataset[0]
print (image.size())
print (label)

torch.Size([3, 32, 32])
6


Data loader (this provides queues and threads in a very simple way).

In [20]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True)

When iteration starts, queue and thread start to load data from files.

In [21]:
data_iter = iter(train_loader)

Mini-batch images and labels.

In [22]:
images, labels = data_iter.next()

Actual usage of the data loader is as below.

In [23]:
for images, labels in train_loader:
    # Training code should be written here.
    pass

## Input pipline for custom dataset

You should your build your custom dataset as below

In [24]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self):
        # TODO
        # 1. Initialize file paths or a list of file names. 
        pass
    def __getitem__(self, index):
        # TODO
        # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
        # 2. Preprocess the data (e.g. torchvision.Transform).
        # 3. Return a data pair (e.g. image and label).
        pass
    def __len__(self):
        # You should change 0 to the total size of your dataset.
        return 0 


You can then use the prebuilt data loader.

In [25]:
custom_dataset = CustomDataset()
train_loader = torch.utils.data.DataLoader(dataset=custom_dataset,
                                           batch_size=64, 
                                           shuffle=True)

## Pretrained model

Download and load the pretrained ResNet-18

In [26]:
resnet = torchvision.models.resnet18(pretrained=True)
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

If you want to finetune only the top layer of the model, set as below.

In [27]:
for param in resnet.parameters():
    param.requires_grad = False

Replace the top layer for finetuning.

In [28]:
resnet.fc = nn.Linear(resnet.fc.in_features, 100)  # 100 is an example.
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

Forward pass.

In [29]:
images = torch.randn(64, 3, 224, 224)
outputs = resnet(images)
print (outputs.size())     # (64, 100)

torch.Size([64, 100])


## Save and load the model

Save and load the entire model.

In [30]:
torch.save(resnet, 'model.ckpt')
model = torch.load('model.ckpt')

Save and load only the model parameters (recommended).

In [31]:
torch.save(resnet.state_dict(), 'params.ckpt')
resnet.load_state_dict(torch.load('params.ckpt'))