<a href="https://colab.research.google.com/github/Aska-zhang/O50/blob/master/PyTorchIntro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PyTorch Introduction
#### Yutao ZHU, University of Montreal

Pytorch is a Python-based scientific computing package serving two broad purposes:
- A replacement for NumPy to use the power of GPUs and other accelerators
- An automatic differentiation library that is useful to implement neural networks

####Tensors
- Similar to arrays and matrices
- NumPy's ndarrays

In [None]:
import torch 
import torch.nn as nn

In [None]:
# Create Tensor
data = [[1, 2], [3, 4]]
x_data = torch.tensor(data)
print(x_data)

tensor([[1, 2],
        [3, 4]])


In [None]:
data = [[1, 2, 3], [4, 5]]
x_data = torch.tensor(data)
print(x_data)

ValueError: ignored

In [None]:
rand_tensor = torch.rand(2, 2)
ones_tesnor = torch.ones(2, 2)
zeros_tensor = torch.zeros(2, 2)
print(rand_tensor)
print(ones_tesnor)
print(zeros_tensor)

tensor([[0.4614, 0.0777],
        [0.3027, 0.1466]])
tensor([[1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.]])


In [None]:
# Attributes
t = torch.rand(3, 4)
print(t.shape)
print(t.size())
print(t.size(1))
print(t.dtype)
print(t.device)
print(t.type())

torch.Size([3, 4])
torch.Size([3, 4])
4
torch.float32
cpu
torch.FloatTensor


In [None]:
# Type
t = torch.rand(3, 4)
t1 = t.int()
print(t1)

tensor([[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]], dtype=torch.int32)


In [None]:
# To GPU
t = torch.rand(3, 4)
t.to("cuda")
print(t)

tensor([[0.5781, 0.8331, 0.0728, 0.5034],
        [0.1112, 0.3051, 0.8690, 0.2731],
        [0.6267, 0.2233, 0.1759, 0.4045]])


In [None]:
# Indexing and Slicing
t = torch.ones(4, 4)
print(t[0, 0])
t[:, 1] = 0
print(t)

tensor(1.)
tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [None]:
# Concatenation
t = torch.ones(4, 4)
t2 = torch.cat([t, t, t], dim=0)
t3 = torch.cat([t, t, t], dim=1)
print(t2.size())
print(t3.size())
# t4 = torch.cat([t, t, t], dim=2) 
t = t.unsqueeze(-1)
print(t.size())
t4 = torch.cat([t, t, t], dim=2)
print(t4.size())

torch.Size([12, 4])
torch.Size([4, 12])
torch.Size([4, 4, 1])
torch.Size([4, 4, 3])


In [None]:
# Basic Operation
t = torch.ones(4, 4)
t2 = t * t
t3 = t + t
t4 = t2 - t
t5 = t4 / t
print(t)
print(t2)
print(t3)
print(t4)
print(t5)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([[2., 2., 2., 2.],
        [2., 2., 2., 2.],
        [2., 2., 2., 2.],
        [2., 2., 2., 2.]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])


In [None]:
t = torch.ones(2, 4)
t2 = torch.matmul(t, t.T)
print(t2.size())
t3 = torch.matmul(t.T, t)
print(t3.size())
t4 = t.matmul(t.T)
print(t4.size())

torch.Size([2, 2])
torch.Size([4, 4])
torch.Size([2, 2])


In [None]:
t = torch.ones(2, 4)
t = t.view(4, 2)
t = t.reshape(4, -1)
t = t.view(-1) 
print(t.size())

t = torch.randn(2, 4)
t2 = t.permute(1, 0)
t3 = t.reshape(4, 2)
print(t)
print(t2)
print(t3)

t = torch.randn(2, 3, 4)
t4 = t.permute(1, 0, 2)
print(t4.size())

torch.Size([8])
tensor([[ 1.9267,  0.8563,  0.9197, -1.2746],
        [ 1.0293,  0.2902, -0.0114,  0.0909]])
tensor([[ 1.9267,  1.0293],
        [ 0.8563,  0.2902],
        [ 0.9197, -0.0114],
        [-1.2746,  0.0909]])
tensor([[ 1.9267,  0.8563],
        [ 0.9197, -1.2746],
        [ 1.0293,  0.2902],
        [-0.0114,  0.0909]])
torch.Size([3, 2, 4])


In [None]:
# Squeeze & Unsqueeze
t = torch.randn(5, 1, 1, 6)
print(t.squeeze(1).size())
print(t.squeeze().size())
print(t.unsqueeze(0).size())

torch.Size([5, 1, 6])
torch.Size([5, 6])
torch.Size([1, 5, 1, 1, 6])


In [None]:
# Einsum
A = torch.randn(3, 2, 5)
B = torch.randn(3, 5, 4)
C = torch.einsum('bij,bjk->bik', A, B)
print(C.size())

A = torch.randn(3, 5, 4)
l = torch.randn(2, 5)
r = torch.randn(2, 4)
B = torch.einsum('bn,anm,bm->ba', l, A, r)
print(B.size())

torch.Size([3, 2, 4])
torch.Size([2, 3])


In [None]:
A = torch.randn(5, 10)
B = torch.randn(5, 10)
C = torch.einsum('bd,cd->bc', A, B)
print(C.size())

A = torch.randn(5, 10, 15)
B = torch.randn(5, 20, 15)
C = torch.randn(15, 15)
D = torch.einsum('bld,dd,bkd->blk', A, C, B)
print(D.size())

torch.Size([5, 5])
torch.Size([5, 10, 20])


In [None]:
# Broadcasting
x = torch.randn(5, 3)
y = torch.randn(5, 1)
z = x + y
a = x + 5
print(z.size())
print(a.size())

torch.Size([5, 3])
torch.Size([5, 3])


In [None]:
# Maximum, Minimum, and Mean
A = torch.randn(5, 10)
m1 = A.max(dim=1)[0]
m2 = A.max(dim=0)[0]
m3 = A.mean(dim=1)
m4 = A.mean(dim=1, keepdim=True)
print(m1.size(), m2.size(), m3.size(), m4.size())

torch.Size([5]) torch.Size([10]) torch.Size([5]) torch.Size([5, 1])


#### Neural Networks

In [None]:
# Linear
m = nn.Linear(20, 30, bias=True)
a = torch.randn(128, 20)
b = m(a)
print(b.size())

torch.Size([128, 30])


In [None]:
# Embedding
# nn.Embedding(num_embeddings, embedding_dim, padding_idx=None)
# nn.Embedding.from_pretrained(embeddings, freeze=True, padding_idx=None)
weight = torch.rand(10, 400)
embedding = nn.Embedding.from_pretrained(weight)
a = torch.LongTensor([1])
print(embedding(a).size())

torch.Size([1, 400])


In [None]:
# Conv1d
# nn.Conv1d(in, out, kernel_size, stride=1, padding=0)
m = nn.Conv1d(16, 32, 3, stride=2)
a = torch.randn(20, 16, 50)
b = m(a)
print(b.size())
# bsz = 20, seq_len = 50, hidden = 16

torch.Size([20, 32, 24])


In [None]:
# Conv2d
# nn.Conv2d(in, out, kernel_size, stride=1, padding=0)
m = nn.Conv2d(16, 33, 3, stride=2)
a = torch.randn(20, 16, 50, 100)
b = m(a)
print(b.size())

torch.Size([20, 33, 24, 49])


In [None]:
# MaxPool1d
# nn.MaxPool1d(kernel_size, stride=None, padding=0)
m = nn.MaxPool1d(3, stride=2)
a = torch.randn(20, 16, 50)
b = m(a)
print(b.size())

torch.Size([20, 16, 24])


In [None]:
# MaxPool2d
# nn.MaxPool2d(kernel_size, stride=None, padding=0)
m = nn.MaxPool2d((3, 2), stride=(2, 1))
a = torch.randn(20, 16, 50, 32)
b = m(a)
print(b.size())

torch.Size([20, 16, 24, 31])


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [None]:
# GRU
# nn.GRU(input_size, hidden_size, num_layers=1, batch_first=False, bidirectional=False)
gru = nn.GRU(10, 20, 2, batch_first=True)
a = torch.randn(3, 10, 10)
output, h = gru(a)
print(output.size())  # (only last layer)
print(h.size())  # (not affected by batch_first)

torch.Size([3, 10, 20])
torch.Size([2, 3, 20])


In [None]:
# LSTM
# nn.LSTM(input_size, hidden_size, num_layers=1, batch_first=False)
lstm = nn.LSTM(10, 20, 2, batch_first=True)
a = torch.randn(3, 10, 10)
output, (hn, cn) = lstm(a)
print(output.size())
print(hn.size())
print(cn.size())

torch.Size([3, 10, 20])
torch.Size([2, 3, 20])
torch.Size([2, 3, 20])


In [None]:
# Sequential
a = nn.Sequential(
	nn.Linear(10, 10), 
	nn.ReLU(), 
	nn.Linear(10, 1)
)
b = torch.randn(10, 10)
c = a(b)
print(c.size())

torch.Size([10, 1])


In [None]:
# Dropout
p = nn.Dropout(p=0.1)

In [None]:
# Non-linear activation
nn.Softmax(dim=None)
nn.Sigmoid()
nn.ReLU()
nn.Tanh()

In [None]:
# Transformer
# nn.TransformerEncoder(encoder_layer, num_layers, norm=None)
# nn.TransformerEncoderLayer(d_model, nhead, dim_ff, dropout=0.1, activation=‘relu’)
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
src = torch.rand(10, 32, 512)
out = transformer_encoder(src)
print(out.size())

torch.Size([10, 32, 512])


In [None]:
# Loss Function
nn.MSELoss()
(N, *), (N, *)

nn.CrossEntropyLoss()
(N, C), (N) (Target is a LongTensor, no Softmax)

nn.NLLLoss()
(N, C), (N)

nn.KLDivLoss()
(N, *), (N, *)

nn.BCELoss()
(N, *), (N, *) (Target is a FloatTensor, need Sigmoid)

nn.BCEWithLogitsLoss()
(N, *), (N, *) (no Sigmoid, more stable)
