### Elt wise multiply

In [1]:
import torch
import torch.nn as nn
a= torch.tensor([[1,2,3]]) #1x3
b=torch.tensor([[1,2,3]]) 
a*b

tensor([[1, 4, 9]])

In [2]:
b=torch.tensor([[4]]) 
a*b

tensor([[ 4,  8, 12]])

In [3]:
b=torch.tensor([4]) 
a*b

tensor([[ 4,  8, 12]])

### Transpose

In [4]:
a = torch.tensor([[[1,2,3]],[[11,12,13]]]) 
a.shape

torch.Size([2, 1, 3])

In [5]:
torch.transpose(a, 0, 1).shape

torch.Size([1, 2, 3])

In [6]:
torch.transpose(a, 1, 2).shape

torch.Size([2, 3, 1])

In [7]:
torch.transpose(a, -1, -2).shape  #same as torch.transpose(a, -2, -1)

torch.Size([2, 3, 1])

### squeeze

In [8]:
a=torch.tensor([[1,3],[2,1],[1,1]])
print(a.shape)

torch.Size([3, 2])


In [9]:
a.unsqueeze(0).shape

torch.Size([1, 3, 2])

In [10]:
b=a.unsqueeze(1)
b.shape

torch.Size([3, 1, 2])

In [11]:
c=b.repeat(1,3,1)
c.shape

torch.Size([3, 3, 2])

In [12]:
c.squeeze(-1).shape

torch.Size([3, 3, 2])

In [13]:
c

tensor([[[1, 3],
         [1, 3],
         [1, 3]],

        [[2, 1],
         [2, 1],
         [2, 1]],

        [[1, 1],
         [1, 1],
         [1, 1]]])

In [14]:
print(a.shape)
a.repeat(1,3,1,2).shape # 3, 2 x 1,2 = 3, 4

torch.Size([3, 2])


torch.Size([1, 3, 3, 4])

### Matrix multiply

In [11]:
# If the first argument is 2-dimensional and the second argument is 1-dimensional, the matrix-vector product is returned.


print(a.shape)
b=torch.tensor([1,1,2])
print(b.shape)
a@b # 1x3 3x1

torch.Size([1, 3])
torch.Size([3])


tensor([9])

In [13]:
a= torch.tensor([[1,2,3], [1,2,3]])
print(a.shape)
b=torch.tensor([1,1,2])
print(b.shape)
a@b # 2x3 3x{2}

torch.Size([2, 3])
torch.Size([3])


tensor([9, 9])

In [12]:
# If the first argument is 1-dimensional and the second argument is 2-dimensional, a 1 is prepended to its dimension 
# for the purpose of the matrix multiply. After the matrix multiply, the prepended dimension is removed.

c=torch.tensor([[4],[4],[3]])
print(c.shape)
b@c # {1}x3 3x1

torch.Size([3, 1])


tensor([14])

In [15]:
c=torch.tensor([[4,1],[4,1],[3,2]])
print(c.shape)
b@c # {2}x3 3x2

torch.Size([3, 2])


tensor([14,  6])

If both arguments are at least 1-dimensional and at least one argument is N-dimensional (where N > 2), then a batched matrix multiply is returned. If the first argument is 1-dimensional, a 1 is prepended to its dimension for the purpose of the batched matrix multiply and removed after. If the second argument is 1-dimensional, a 1 is appended to its dimension for the purpose of the batched matrix multiple and removed after. The non-matrix (i.e. batch) dimensions are broadcasted (and thus must be broadcastable). For example, if input is a (j x 1 x n x n)(j×1×n×n) tensor and other is a (k x n x n)(k×n×n) tensor, out will be a (j x k x n x n)(j×k×n×n) tensor.

Note that the broadcasting logic only looks at the batch dimensions when determining if the inputs are broadcastable, and not the matrix dimensions. For example, if input is a (j x 1 x n x m)(j×1×n×m) tensor and other is a (k x m x p)(k×m×p) tensor, these inputs are valid for broadcasting even though the final two dimensions (i.e. the matrix dimensions) are different. out will be a (j x k x n x p)(j×k×n×p) tensor.

### Linear

In [49]:
import torch.nn as nn
>>> m = nn.Linear(20, 30)
>>> input = torch.randn(128, 10, 20)
>>> output = m(input)
>>> print(output.size())

torch.Size([128, 10, 30])


### Split and Concat

In [6]:
a=torch.tensor([[1,2],[3,4],[5,6]])
print(a.shape)

torch.Size([3, 2])


In [8]:
torch.cat(torch.split(a, 3, dim=0) , dim=0) 

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [3]:
torch.split(a, 1, dim=0) #1x2, 1x2, 1x2

(tensor([[1, 2]]), tensor([[3, 4]]), tensor([[5, 6]]))

In [4]:
torch.split(a, 1, dim=1)  #3x1, 3x1

(tensor([[1],
         [3],
         [5]]), tensor([[2],
         [4],
         [6]]))

In [62]:
a

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [63]:
torch.cat(torch.split(a, 1, dim=1), dim=0) #6x1

tensor([[1],
        [3],
        [5],
        [2],
        [4],
        [6]])

In [64]:
torch.cat(torch.split(a, 1, dim=0), dim=1) #1x6

tensor([[1, 2, 3, 4, 5, 6]])

In [65]:
torch.cat(torch.split(a, 1, dim=0), dim=0) #3x2

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [66]:
torch.cat(torch.split(a, 1, dim=1), dim=1)

tensor([[1, 2],
        [3, 4],
        [5, 6]])

### LayerNormalization

In [5]:
input = torch.randn(20, 5, 10, 10)  # Applies layernorm on last 2 dimensions
m = nn.LayerNorm([10, 10])
output = m(input)
output.shape

torch.Size([20, 5, 10, 10])

In [68]:
m = nn.LayerNorm([10])
output = m(input)
output.shape

torch.Size([20, 5, 10, 10])

In [70]:
m = nn.LayerNorm([5, 10, 10])
output = m(input)
output.shape

torch.Size([20, 5, 10, 10])

In [72]:
m = nn.LayerNorm([20, 5, 10, 10])
output = m(input)
output.shape

torch.Size([20, 5, 10, 10])

In [71]:
m = nn.LayerNorm([3, 10, 10])
output = m(input)
output.shape

RuntimeError: Given normalized_shape=[3, 10, 10], expected input with shape [*, 3, 10, 10], but got input of size[20, 5, 10, 10]

In [23]:
input = torch.tensor([[1,2,3],[4,4,1]], dtype=torch.float32)
m = nn.LayerNorm([3])
output = m(input)
print(input)
output  # standardize horizontally

tensor([[1., 2., 3.],
        [4., 4., 1.]])


tensor([[-1.2247,  0.0000,  1.2247],
        [ 0.7071,  0.7071, -1.4142]], grad_fn=<NativeLayerNormBackward>)

In [20]:
import numpy as np
from sklearn.preprocessing import StandardScaler
data = np.array([[1,2,3],[4,4,1]]).T
data  #standardize vertically

array([[1, 4],
       [2, 4],
       [3, 1]])

In [21]:
scaler = StandardScaler()
scaler.fit(data)
scaler.transform(data).T

array([[-1.22474487,  0.        ,  1.22474487],
       [ 0.70710678,  0.70710678, -1.41421356]])

In [22]:
print(scaler.mean_)
print(scaler.var_)

[2. 3.]
[0.66666667 2.        ]


###  BatchNorm

In [9]:
>>> # With Learnable Parameters
>>> m = nn.BatchNorm1d(100)
>>> # Without Learnable Parameters
>>> m = nn.BatchNorm1d(100, affine=False)
>>> input = torch.randn(20, 100)
>>> output = m(input)
output.shape

torch.Size([20, 100])

In [15]:
input = torch.tensor([[[1,2,3],[4,4,1]],[[1,2,3],[4,4,1]]], dtype=torch.float32)
print(input.shape)
m = nn.BatchNorm1d(2, affine=False)
output = m(input)
print(output.shape)
output  # standardize horizontally

torch.Size([2, 2, 3])
torch.Size([2, 2, 3])


tensor([[[-1.2247,  0.0000,  1.2247],
         [ 0.7071,  0.7071, -1.4142]],

        [[-1.2247,  0.0000,  1.2247],
         [ 0.7071,  0.7071, -1.4142]]])

In [16]:
input[0]

tensor([[1., 2., 3.],
        [4., 4., 1.]])

### pack_padded_sequence

In [20]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
seq = torch.tensor([[1,2,0], [3,0,0], [4,5,6]], dtype=torch.float32).unsqueeze(-1)
lens = [2, 1, 3]
packed = pack_padded_sequence(seq, lens, batch_first=True, enforce_sorted=False)
packed

PackedSequence(data=tensor([[4.],
        [1.],
        [3.],
        [5.],
        [2.],
        [6.]]), batch_sizes=tensor([3, 2, 1]), sorted_indices=tensor([2, 0, 1]), unsorted_indices=tensor([1, 2, 0]))

In [26]:
rnn = nn.RNN(1, 1, 1) #in_f, out_f, heads
output, hn = rnn(seq)

In [29]:
h0 = torch.zeros(1,3,1)
output, hn = rnn(packed, h0)

In [30]:
output

PackedSequence(data=tensor([[-0.9544],
        [-0.4937],
        [-0.8923],
        [-0.9501],
        [-0.6244],
        [-0.9793]], grad_fn=<CatBackward>), batch_sizes=tensor([3, 2, 1]), sorted_indices=tensor([2, 0, 1]), unsorted_indices=tensor([1, 2, 0]))

In [31]:
out = torch.nn.utils.rnn.pad_packed_sequence(output, batch_first=True)

In [34]:
out[0].shape

torch.Size([3, 3, 1])

In [35]:
seq.shape

torch.Size([3, 3, 1])

### Masked Fill

In [112]:
mask = (torch.triu(torch.ones(3,3)) == 1).transpose(0, 1)
mask

tensor([[ True, False, False],
        [ True,  True, False],
        [ True,  True,  True]])

In [113]:
mask = mask.float()
mask

tensor([[1., 0., 0.],
        [1., 1., 0.],
        [1., 1., 1.]])

In [114]:
mask.masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))

tensor([[0., -inf, -inf],
        [0., 0., -inf],
        [0., 0., 0.]])

In [121]:
mask = mask.double()

In [124]:
torch.where(mask == 0., float('-inf'), mask)

tensor([[1., -inf, -inf],
        [1., 1., -inf],
        [1., 1., 1.]], dtype=torch.float64)

### Parameter weight access

In [104]:
l = nn.Linear(1,2)
l.weight

Parameter containing:
tensor([[0.8645],
        [0.0497]], requires_grad=True)

### Type conversion

In [109]:
seq.int().type()

'torch.IntTensor'

### clip_grad_norm_

In [96]:
model = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1,1),
            nn.LayerNorm(256),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(1,1),
            )

In [98]:
model.parameters

<bound method Module.parameters of Sequential(
  (0): ReLU()
  (1): Dropout(p=0.5, inplace=False)
  (2): Linear(in_features=1, out_features=1, bias=True)
  (3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  (4): ReLU()
  (5): Dropout(p=0.25, inplace=False)
  (6): Linear(in_features=1, out_features=1, bias=True)
)>

In [None]:
optimizer.zero_grad()        
loss, hidden = model(data, hidden, targets)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1)  # clip_value=1
optimizer.step()

### Avoid padding in loss

In [None]:
indices = np.where(inp != 0)
loss = bce_criterion(y_pred[indices], y_true[indices])

In [None]:
# inside forward
att = att.masked_fill(inp == 0, float('-inf')) #Large negative value
att = torch.softmax(att)
out =  torch.bmm(att, inp)

In [130]:
a = torch.tensor([[1,2,3],[1,1,1]], dtype=torch.float32)
torch.softmax(a, dim=0)

tensor([[0.5000, 0.7311, 0.8808],
        [0.5000, 0.2689, 0.1192]])

In [131]:
torch.softmax(a, dim=1)

tensor([[0.0900, 0.2447, 0.6652],
        [0.3333, 0.3333, 0.3333]])

In [134]:
a = torch.tensor([[1,2,-30000],[1,1,1]], dtype=torch.float32)
torch.softmax(a, dim=1)

tensor([[0.2689, 0.7311, 0.0000],
        [0.3333, 0.3333, 0.3333]])

### view

In [2]:
a

tensor([[1, 2, 3]])

In [7]:
a.view(3,-1).contiguous()

tensor([[1],
        [2],
        [3]])

In [8]:
a

tensor([[1, 2, 3]])