In [1]:
import torch
import numpy as np

## Tensors

In [3]:
data = [[1,2], [3,4]]
x_data = torch.tensor(data)
x_data.shape

torch.Size([2, 2])

In [4]:
x_scalar = torch.tensor(3.14159)
x_empty = torch.tensor([])

In [5]:
# 1. tensor creation from numpy arrays
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

In [6]:
# 2. tensor creation using APIs
shape=(2,3)
torch.rand(shape)

tensor([[0.4707, 0.6988, 0.2527],
        [0.4743, 0.1849, 0.3477]])

In [7]:
torch.ones(shape)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [9]:
torch.zeros(shape)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [10]:
# 3. tensor creation from other tensors
x_ones = torch.ones_like(x_data)
x_ones

tensor([[1, 1],
        [1, 1]])

In [11]:
x_ones = torch.rand_like(x_data, dtype=torch.float)
x_ones

tensor([[0.2120, 0.0302],
        [0.9931, 0.3155]])

In [12]:
# Tensor attributes
tensor = torch.rand(3,4)
print(tensor.shape, tensor.dtype, tensor.device)

torch.Size([3, 4]) torch.float32 cpu


In [13]:
# tensor = tensor.to('cuda')
tensor = tensor.cuda(1)  # cpu - cpu, gpu-gpu 간 연산만 가능(cuda 0 - cuda 0, cuda 1 - cuda 1)
tensor.device

AssertionError: Torch not compiled with CUDA enabled

In [14]:
# tensor operations
tensor = torch.ones(4,4)
tensor[:,1] = 0
tensor

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [16]:
torch.cat([tensor,tensor], dim=1)

tensor([[1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1.]])

In [17]:
tensor + tensor * tensor - tensor  # element-wise arithmetic operations

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [18]:
tensor @ tensor                   # matrix multiplication

tensor([[3., 0., 3., 3.],
        [3., 0., 3., 3.],
        [3., 0., 3., 3.],
        [3., 0., 3., 3.]])

## Loading Data
- custom dataset을 loading할 때는 __init__(self,[]), __len__(self), __getitem__(self, idx) methods를 define해야 함  
1. init : dataset root directory / transform / image path 등 정의
2. len : dataset의 length(크기)를 반환 -> DataLoader의 batch_size 등을 정할 때 필요
3. getitem : (idx)^th data를 return -> DataLoader call this method

In [19]:
# all datasets are subsets of torch.utils.data.Dataset
# you can make your own datasets by inheriting Dataset class
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

In [None]:
# Some famous datasets is provided in torchvision.datasets
training_data = datasets.FashionMNIST(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor())

test_data = datasets.FashionMNIST(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor())

In [None]:
# DataLoader reads datasets and handles multi-process mini-batch data loading
# Multi-process mini-batch loader can avoid bottleneck on reading and transferring data
train_loader = DataLoader(dataset=training_data,
                         batch_size = 32,
                         shuffle = True,   # 모델이 이미지 순서를 cheating하여 학습하지 않도록 trainig은 shuffle해줘야 함(test는 shuffle하지 않는다)
                         num_workers = 2)  # 멀티 프로세스의 workers 수

## Build the Neural Network
You should define two method in network class: "__init__" and "__forward__"
- init : You need to implement the network architecture using torch.nn namespace.
- forward : define how do you compute the forward path of the neural network.

In [22]:
from torch import nn
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(nn.Linear(28*28, 512),
                                              nn.ReLU(),
                                              nn.Linear(512,512),
                                              nn.ReLU(),
                                              nn.Linear(512,10),
                                              )
        
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

## Training
1. define Model
2. define DataLoader
3. define Optimizer
4. Iterate the below lines until reaching the max_epoch.(but, you can set stop iteration when model is overfitted)  
   (1) Sample a batch from the data loader  
   (2) Predict the output using the neural network  
   (3) Compute the error between the answer and prediction  
   (4) Compute the gradient using optimizer  
   (5) Backpropagate the gradient to optimize the NN parameters  

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X,y) in enumerate(dataloader):  # epoch만큼 iterate 한다
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # Backpropagation : 세 줄로 이루어진 optimization step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()  # gradient 전부 계산되면 정해준 optimization 알고리즘에 따라 model weight update
        
        # logging
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d} / {size:>5d}]")

## Save and Load Model Weights
PyTorch library also provide the functions(torch.save, torch.load) to save and store model weights.

In [None]:
model = models.vgg16(pretrained = True)
torch.save(model.state_dict(), 'model_weights.pth')

In [None]:
model = models.vgg16() # We do not specify pretrained = True, i.e. do not load default weights
model.load_state_dict(torch.load('model_weights.pth'))  # vgg16 pretrained weights를 dict에 박아주고
model.eval()                                           # training을 통한 model update하지 않고 test만 바로 한다면 eval

## Transfer Learning(with torchvision model)
Example of image classification model fine-tuning(transfer learning)

In [None]:
model_ft = models.resnet18(pretrained = True)
num_ftrs = model_ft.fc.in_features

model_ft.fc = nn.Linear(num_ftrs, 2) # pretrained resnet 모델은 1000개의 classes를 분류하지만, 
                                     # FC Layer에서 2개의 class만 분류하도록 FC Layer fine-tuning
model_ft = model_ft.to(device)
criterion = nn.CrossEnrtopyLoss()    # linear -> MSE, linear + activation function -> CrossEntropy
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)   # Adam, SGD 중 SGD를 optimizer로 선택 + momentum(관성)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)  # lr은 학습을 진행할수록 점차 줄여나가는 게 효과적