<a href="https://colab.research.google.com/github/ga642381/ML2021-Spring/blob/main/Pytorch/Pytorch_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Pytorch Tutorial**


In [1]:
import torch

**1. Pytorch Documentation Explanation with torch.max**



In [2]:
x = torch.randn(4,5)
y = torch.randn(4,5)
z = torch.randn(4,5)
print(x)
print(y)
print(z)

tensor([[ 0.5333, -2.1289,  0.0134,  0.2944,  1.3323],
        [-0.3073,  0.7287, -0.3138, -0.5779, -0.4857],
        [ 2.1097,  0.7444, -0.6332, -2.2748, -0.6700],
        [-0.6998,  0.4827, -0.1669,  0.3563,  0.1973]])
tensor([[-0.3793,  0.2745, -0.3627,  0.7330, -1.1814],
        [ 0.7172,  0.3911, -0.8705, -0.6460, -1.2681],
        [-1.2629,  0.6069,  0.7654,  0.6288, -1.1020],
        [-0.8329, -1.8591,  0.6841,  0.0319,  0.4117]])
tensor([[ 1.2418,  0.0421, -0.1323,  0.4641,  1.5034],
        [-1.6918,  2.5436,  0.4511, -0.7256, -1.2441],
        [-0.7143, -0.4768, -0.0450, -0.7399,  0.5821],
        [-0.3500, -0.1162,  0.5992,  0.4575,  1.7788]])


In [5]:
# 1. max of entire tensor (torch.max(input) → Tensor)
m = torch.max(x)
print(m)

tensor(2.1097)


In [6]:
# 2. max along a dimension (torch.max(input, dim, keepdim=False, *, out=None) → (Tensor, LongTensor))
m, idx = torch.max(x,0)
print(m)
print(idx)

tensor([2.1097, 0.7444, 0.0134, 0.3563, 1.3323])
tensor([2, 2, 0, 3, 0])


In [7]:
# 2-2
m, idx = torch.max(input=x,dim=0)
print(m)
print(idx)

tensor([2.1097, 0.7444, 0.0134, 0.3563, 1.3323])
tensor([2, 2, 0, 3, 0])


In [None]:
# 2-3
# keepdim = False represents that the output tensor will not retain the reduced dimension.
m, idx = torch.max(x,0,False)
print(m)
print(idx)

tensor([2.1097, 0.7444, 0.0134, 0.3563, 1.3323])
tensor([2, 2, 0, 3, 0])


In [None]:
# 2-4
# keepdim = True represents that the output tensor will retain the reduced dimension.
m, idx = torch.max(x,dim=0,keepdim=True)
print(m)
print(idx)

tensor([[2.1097, 0.7444, 0.0134, 0.3563, 1.3323]])
tensor([[2, 2, 0, 3, 0]])


In [10]:
# 2-5
p = (m,idx)
torch.max(x,0,False,out=p)
print(p[0])
print(p[1])


tensor([2.1097, 0.7444, 0.0134, 0.3563, 1.3323])
tensor([2, 2, 0, 3, 0])


  torch.max(x,0,False,out=p)


In [11]:
# 2-6
p = (m,idx)
torch.max(x,0,False,p)
print(p[0])
print(p[1])

TypeError: max() received an invalid combination of arguments - got (Tensor, int, bool, tuple), but expected one of:
 * (Tensor input, *, Tensor out = None)
 * (Tensor input, Tensor other, *, Tensor out = None)
      didn't match because some of the arguments have invalid types: (Tensor, !int!, !bool!, !tuple of (Tensor, Tensor)!)
 * (Tensor input, int dim, bool keepdim = False, *, tuple of Tensors out = None)
 * (Tensor input, name dim, bool keepdim = False, *, tuple of Tensors out = None)


In [14]:
# 2-7
m, idx = torch.max(x,True)

TypeError: max() received an invalid combination of arguments - got (Tensor, bool), but expected one of:
 * (Tensor input, *, Tensor out = None)
 * (Tensor input, Tensor other, *, Tensor out = None)
 * (Tensor input, int dim, bool keepdim = False, *, tuple of Tensors out = None)
 * (Tensor input, name dim, bool keepdim = False, *, tuple of Tensors out = None)


In [None]:
# 3. max(choose max) operators on two tensors (torch.max(input, other, *, out=None) → Tensor)
# 选择每一个位置上的最大值
# choose max value between two tensors
t = torch.max(x,y)
print(t)

tensor([[ 0.5333,  0.2745,  0.0134,  0.7330,  1.3323],
        [ 0.7172,  0.7287, -0.3138, -0.5779, -0.4857],
        [ 2.1097,  0.7444,  0.7654,  0.6288, -0.6700],
        [-0.6998,  0.4827,  0.6841,  0.3563,  0.4117]])


**2. Common errors**



The following code blocks show some common errors while using the torch library. First, execute the code with error, and then execute the next code block to fix the error. You need to change the runtime to GPU.


In [20]:
import torch

In [None]:
# 1. different device error
model = torch.nn.Linear(5,1).to("cuda:0")
x = torch.Tensor([1,2,3,4,5]).to("cpu")
y = model(x)

In [22]:
# 1. different device error (fixed)
x = torch.Tensor([1,2,3,4,5]).to("cuda:0")
y = model(x)
print(y.shape)

torch.Size([1])


In [None]:
# 2. mismatched dimensions error
x = torch.randn(4,5)
y= torch.randn(5,4)
z = x + y

RuntimeError: ignored

In [None]:
# 2. mismatched dimensions error (fixed)
y= y.transpose(0,1)
z = x + y
print(z.shape)

torch.Size([4, 5])


In [3]:
# 3. cuda out of memory error
import torch
import torchvision.models as models
resnet18 = models.resnet18().to("cuda:0") # Neural Networks for Image Recognition
data = torch.randn(2048,3,244,244) # Create fake data (512 images)
out = resnet18(data.to("cuda:0")) # Use Data as Input and Feed to Model
print(out.shape)


OutOfMemoryError: CUDA out of memory. Tried to allocate 7.27 GiB. GPU 0 has a total capacity of 23.52 GiB of which 355.81 MiB is free. Process 3151197 has 21.31 GiB memory in use. Including non-PyTorch memory, this process has 1.80 GiB memory in use. Of the allocated memory 1.41 GiB is allocated by PyTorch, and 19.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# 3. cuda out of memory error (fixed)
for d in data:
  out = resnet18(d.to("cuda:0").unsqueeze(0))
print(out.shape)


torch.Size([1, 1000])


In [5]:
print(out)

tensor([[ 1.7820e-01, -6.7507e-01, -2.4757e-01,  2.5081e-02, -1.6184e-01,
          8.0630e-02, -1.0595e-01,  1.3599e-02, -2.0529e-01,  2.1550e-01,
         -5.9498e-01,  5.5215e-02, -1.3171e-01,  2.6458e-01, -5.4338e-01,
          2.1331e-01,  3.8900e-01, -3.2000e-01, -4.9684e-01,  1.2532e+00,
          6.1842e-01, -4.4870e-01, -4.4976e-02, -3.2155e-01, -4.9260e-01,
          2.0059e-01,  3.1546e-01,  1.1004e+00,  3.8112e-01, -7.8669e-01,
         -7.4259e-01,  3.8865e-01,  6.2221e-01, -2.2290e-01, -6.9785e-02,
         -1.2549e-01,  3.9365e-01,  4.2896e-01,  2.9827e-01,  6.2066e-01,
          1.6700e-01,  8.3207e-01,  2.1306e-01, -1.1544e-01, -1.8049e-01,
         -4.3651e-01,  5.8072e-01,  3.7959e-01, -8.2450e-02,  4.7491e-01,
          5.1069e-02,  5.3747e-01,  5.8149e-01, -1.3893e-01, -7.0281e-01,
         -3.7909e-01, -9.0739e-01,  1.9776e-01,  3.1438e-01, -3.2686e-03,
         -1.4422e-01, -7.7786e-02,  4.7470e-01, -1.8298e-01, -8.6349e-02,
         -5.8126e-01,  1.7504e-01,  3.

In [6]:
# 4. mismatched tensor type
import torch.nn as nn
L = nn.CrossEntropyLoss()
outs = torch.randn(5,5)
labels = torch.Tensor([1,2,3,4,0])
lossval = L(outs,labels) # Calculate CrossEntropyLoss between outs and labels

RuntimeError: expected scalar type Long but found Float

In [7]:
# 4. mismatched tensor type (fixed)
labels = labels.long()
lossval = L(outs,labels)
print(lossval)

tensor(1.6936)


**3. More on dataset and dataloader**


A dataset is a cluster of data in a organized way. A dataloader is a loader which can iterate through the data set.

Let a dataset be the English alphabets "abcdefghijklmnopqrstuvwxyz"

In [16]:
dataset = "abcdefghijklmnopqrstuvwxyz"

A simple dataloader could be implemented with the python code "for"

In [17]:
for datapoint in dataset:
  print(datapoint)

a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z


When using the dataloader, we often like to shuffle the data. This is where torch.utils.data.DataLoader comes in handy. If each data is an index (0,1,2...) from the view of torch.utils.data.DataLoader, shuffling can simply be done by shuffling an index array. 

（shuffle 打乱）

torch.utils.data.DataLoader will need two imformation to fulfill its role. First, it needs to know the length of the data. Second, once torch.utils.data.DataLoader outputs the index of the shuffling results, the dataset needs to return the corresponding data.

Therefore, torch.utils.data.Dataset provides the imformation by two functions, `__len__()` and `__getitem__()` to support torch.utils.data.Dataloader

In [18]:
import torch
import torch.utils.data 
class ExampleDataset(torch.utils.data.Dataset):
  def __init__(self):
    self.data = "abcdefghijklmnopqrstuvwxyz"
  
  def __getitem__(self,idx): # if the index is idx, what will be the data?
    return self.data[idx]
  
  def __len__(self): # What is the length of the dataset
    return len(self.data)

dataset1 = ExampleDataset() # create the dataset
dataloader = torch.utils.data.DataLoader(dataset = dataset1,shuffle = True,batch_size = 1)
for datapoint in dataloader:
  print(datapoint)

['g']
['p']
['h']
['x']
['i']
['c']
['s']
['k']
['v']
['a']
['d']
['z']
['u']
['y']
['o']
['w']
['n']
['b']
['r']
['m']
['e']
['t']
['l']
['q']
['f']
['j']


A simple data augmentation technique can be done by changing the code in `__len__()` and `__getitem__()`. Suppose we want to double the length of the dataset by adding in the uppercase letters, using only the lowercase dataset, you can change the dataset to the following.

In [19]:
import torch.utils.data 
class ExampleDataset(torch.utils.data.Dataset):
  def __init__(self):
    self.data = "abcdefghijklmnopqrstuvwxyz"
  
  def __getitem__(self,idx): # if the index is idx, what will be the data?
    if idx >= len(self.data): # if the index >= 26, return upper case letter
      return self.data[idx%26].upper()
    else: # if the index < 26, return lower case, return lower case letter
      return self.data[idx]
  
  def __len__(self): # What is the length of the dataset
    return 2 * len(self.data) # The length is now twice as large

dataset1 = ExampleDataset() # create the dataset
dataloader = torch.utils.data.DataLoader(dataset = dataset1,shuffle = True,batch_size = 1)
for datapoint in dataloader:
  print(datapoint)

['K']
['r']
['A']
['u']
['j']
['D']
['Y']
['o']
['T']
['x']
['i']
['w']
['e']
['G']
['p']
['O']
['X']
['y']
['I']
['L']
['q']
['R']
['B']
['f']
['k']
['Z']
['l']
['W']
['n']
['s']
['g']
['z']
['m']
['H']
['t']
['P']
['C']
['N']
['S']
['V']
['a']
['J']
['v']
['Q']
['U']
['M']
['h']
['E']
['c']
['d']
['F']
['b']
