In [2]:
import torch
from torch import nn

In [3]:
sents = [["hello", "world"], ['I', "am", "muqi"]]
max(len(sent) for sent in sents)

3

In [4]:
type(len(sent) for sent in sents)

generator

## Embedding

In [6]:
embedding = nn.Embedding(num_embeddings=10, embedding_dim=3)

In [7]:
input = torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]])
embedding(input)

tensor([[[-0.0502,  0.9442,  0.1887],
         [ 0.4046,  0.5226,  1.3767],
         [ 0.9331,  0.8740, -0.3424],
         [-0.0202, -1.5035, -0.9274]],

        [[ 0.9331,  0.8740, -0.3424],
         [ 0.0793,  1.2880,  1.1666],
         [ 0.4046,  0.5226,  1.3767],
         [ 0.6238,  0.3612,  0.6272]]], grad_fn=<EmbeddingBackward0>)

In [8]:
# example of changing `pad` vector
padding_idx = 0
embedding = nn.Embedding(3, 3, padding_idx=padding_idx)
embedding.weight

Parameter containing:
tensor([[ 0.0000,  0.0000,  0.0000],
        [-1.2526,  2.5538, -0.6651],
        [ 0.2548,  0.1436,  1.3404]], requires_grad=True)

## nn.Linear

In [9]:
X = torch.rand((2, 3))
X

tensor([[0.8821, 0.3777, 0.8630],
        [0.9137, 0.9933, 0.9808]])

In [10]:
linear = nn.Linear(3, 4, bias=False)
linear(X)

tensor([[ 2.4153e-02, -5.0768e-01,  4.0820e-04, -3.7730e-01],
        [ 2.0532e-01, -8.1654e-01, -2.3987e-01, -5.6550e-01]],
       grad_fn=<MmBackward0>)

In [11]:
print(linear.weight.detach())
weight = linear.weight.detach()


tensor([[ 0.0325,  0.3205, -0.1455],
        [-0.1602, -0.4499, -0.2276],
        [-0.0704, -0.4372,  0.2638],
        [-0.0312, -0.2472, -0.2971]])


In [12]:
weight

tensor([[ 0.0325,  0.3205, -0.1455],
        [-0.1602, -0.4499, -0.2276],
        [-0.0704, -0.4372,  0.2638],
        [-0.0312, -0.2472, -0.2971]])

In [13]:
X @ weight.T 

tensor([[ 2.4153e-02, -5.0768e-01,  4.0820e-04, -3.7730e-01],
        [ 2.0532e-01, -8.1654e-01, -2.3987e-01, -5.6550e-01]])

## nn.LSTM

In [14]:
X = torch.randn(2, 4, 3)  # src_len, batch_size:4, embedding_size=3
X

tensor([[[ 1.5063, -1.1010, -0.7137],
         [-1.2072, -0.8427,  0.3872],
         [ 0.4957, -0.0050, -1.0101],
         [ 0.8605,  0.1032, -1.0073]],

        [[-0.5147, -1.6275,  1.5635],
         [-0.3027, -0.2740,  0.2348],
         [-0.2201,  0.1086, -0.9181],
         [-1.1419,  0.3980,  2.7141]]])

In [16]:
l = nn.LSTM(input_size=3, hidden_size=5)
output, (h_n, c_n) = l(X)

In [17]:
output.size()

torch.Size([2, 4, 5])

In [18]:
h_n.size()

torch.Size([1, 4, 5])

In [19]:
c_n.size()

torch.Size([1, 4, 5])

In [32]:
rnn = nn.LSTM(10, 20, 2) # input_size: 10, hidden_size: 20, num_layer:2
input = torch.randn(5, 3, 10) # src_len: 5, batch_size: 3, embedding_size: 10
h0 = torch.randn(2, 3, 20) # hidden_0:    num_layer: 2, batch_size: 3, hidden_size: 20
c0 = torch.randn(2, 3, 20) # cell_0:      num_layer: 2,  batch_size: 3, hidden_size: 20 
output, (hn, cn) = rnn(input, (h0, c0))

In [31]:
hn.shape

torch.Size([2, 3, 20])

In [23]:
hn

tensor([[[-0.0062,  0.0136, -0.0759, -0.1136,  0.1852, -0.1495, -0.0032,
          -0.0247, -0.0405,  0.1370, -0.2255,  0.0016, -0.0061, -0.0519,
           0.0878,  0.0337, -0.1171, -0.1153, -0.1839,  0.0550],
         [-0.1818,  0.0722,  0.1015, -0.0086,  0.0442, -0.0272, -0.1977,
           0.2624, -0.2884,  0.0169, -0.0990,  0.1516,  0.1033, -0.0417,
           0.0384, -0.1157, -0.1025,  0.0223, -0.1434, -0.0367],
         [-0.1348,  0.0634,  0.0910,  0.1521,  0.0169,  0.0596, -0.1282,
           0.0133, -0.1792,  0.1265, -0.1825, -0.1965,  0.1333,  0.0229,
          -0.0358, -0.0789,  0.0693, -0.1640, -0.1281,  0.1161]],

        [[-0.0244, -0.0013,  0.0169, -0.1830,  0.0003, -0.0704,  0.0325,
           0.0555,  0.0314, -0.1814,  0.0027, -0.0783, -0.0754, -0.0080,
           0.1075,  0.0710, -0.0468, -0.0831,  0.0903,  0.0456],
         [ 0.0059,  0.0487,  0.0209, -0.2704,  0.0442, -0.1275, -0.0824,
          -0.0486,  0.0267, -0.2158,  0.0730, -0.0536, -0.0588, -0.0502,
        

In [24]:
output.shape

torch.Size([5, 3, 20])

In [35]:
output[-1].shape

torch.Size([3, 20])

## rnn.utils

In [39]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence,pack_padded_sequence,pack_sequence,pad_packed_sequence

class MyData(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

def collate_fn(data):
    data.sort(key=lambda x: len(x), reverse=True)
    data = pad_sequence(data, batch_first=True, padding_value=0)
    return data

a = torch.tensor([1,2,3,4])
b = torch.tensor([5,6,7])
c = torch.tensor([7,8])
d = torch.tensor([9])
train_x = [a, b, c, d]

data = MyData(train_x)
data_loader = DataLoader(data, batch_size=2, shuffle=True, collate_fn=collate_fn)
# 采用默认的 collate_fn 会报错
#data_loader = DataLoader(data, batch_size=2, shuffle=True) 
# batch_x = iter(data_loader).next()

In [40]:
for x in data_loader:
    print(x)

tensor([[1, 2, 3, 4],
        [7, 8, 0, 0]])
tensor([[5, 6, 7],
        [9, 0, 0]])


In [41]:
def collate_fn(data):
    data.sort(key=lambda x: len(x), reverse=True)
    seq_len = [s.size(0) for s in data] # 获取数据真实的长度
    data = pad_sequence(data, batch_first=True)    
    data = pack_padded_sequence(data, seq_len, batch_first=True)
    return data

In [53]:
X = torch.randn((7, 2, 3)) # src_len, batch_size, embeding_size
X

tensor([[[ 5.3764e-02, -1.9899e+00,  5.2987e-01],
         [ 1.8881e+00,  3.5205e-01, -1.8764e-01]],

        [[-1.4849e+00,  1.0719e-01, -3.0231e+00],
         [-1.8021e+00, -3.2364e-01, -4.2887e-01]],

        [[ 1.6430e+00, -4.6612e-01, -1.2152e-01],
         [ 6.5978e-01,  6.2439e-01, -1.3417e+00]],

        [[ 1.0185e+00,  1.0280e+00,  1.4772e+00],
         [-1.5829e+00,  1.0082e+00,  7.3375e-01]],

        [[ 1.1040e+00, -8.3155e-01, -1.2767e-01],
         [-8.6767e-01, -4.5987e-01, -2.2826e+00]],

        [[ 9.5114e-01,  1.5636e+00, -4.6500e-01],
         [ 1.9530e-01,  7.6213e-01,  9.1524e-01]],

        [[-7.2296e-01,  2.5964e-01, -2.7762e+00],
         [-2.4087e-03, -2.6460e-01,  2.2495e-01]]])

In [54]:
pack_X = pack_padded_sequence(X, [5, 2], batch_first=False) # get the true
pack_X

PackedSequence(data=tensor([[ 0.0538, -1.9899,  0.5299],
        [ 1.8881,  0.3520, -0.1876],
        [-1.4849,  0.1072, -3.0231],
        [-1.8021, -0.3236, -0.4289],
        [ 1.6430, -0.4661, -0.1215],
        [ 1.0185,  1.0280,  1.4772],
        [ 1.1040, -0.8316, -0.1277]]), batch_sizes=tensor([2, 2, 1, 1, 1]), sorted_indices=None, unsorted_indices=None)

In [86]:
l = nn.LSTM(input_size=3, hidden_size=4, bidirectional=True)
encoded_X, (h, c) = l(pack_X)
encoded_X, h, c

(PackedSequence(data=tensor([[ 0.0513, -0.1216, -0.1206, -0.0626,  0.2191,  0.0462, -0.1620, -0.1192],
         [-0.1388, -0.0696, -0.1174,  0.3243,  0.1639, -0.2015, -0.1552, -0.0157],
         [-0.0276, -0.1672, -0.5100, -0.1163,  0.3693, -0.1052, -0.2718, -0.2799],
         [-0.0499, -0.1091, -0.2752,  0.1658,  0.2080, -0.0085,  0.0015, -0.2020],
         [-0.0718, -0.2435, -0.1977,  0.1779,  0.1624, -0.2799, -0.1635,  0.0482],
         [-0.2580,  0.0138, -0.1369,  0.2754,  0.1702, -0.2720, -0.0945,  0.0573],
         [-0.1224, -0.0945, -0.1727,  0.3547,  0.0462, -0.0785, -0.0934,  0.0103]],
        grad_fn=<CatBackward0>), batch_sizes=tensor([2, 2, 1, 1, 1]), sorted_indices=None, unsorted_indices=None),
 tensor([[[-0.1224, -0.0945, -0.1727,  0.3547],
          [-0.0499, -0.1091, -0.2752,  0.1658]],
 
         [[ 0.2191,  0.0462, -0.1620, -0.1192],
          [ 0.1639, -0.2015, -0.1552, -0.0157]]], grad_fn=<StackBackward0>),
 tensor([[[-0.3987, -0.1752, -0.5811,  0.6445],
          [

In [91]:
h

tensor([[[-0.1224, -0.0945, -0.1727,  0.3547],
         [-0.0499, -0.1091, -0.2752,  0.1658]],

        [[ 0.2191,  0.0462, -0.1620, -0.1192],
         [ 0.1639, -0.2015, -0.1552, -0.0157]]], grad_fn=<StackBackward0>)

In [77]:
all_hidden_states = pad_packed_sequence(encoded_X)[0]
all_hidden_states.size()

torch.Size([5, 2, 8])

In [78]:
all_hidden_states.permute(1, 0, 2).size()

torch.Size([2, 5, 8])

In [79]:
all_hidden_states[-1]

tensor([[-0.1230, -0.0275, -0.1606, -0.1191, -0.1678, -0.0920,  0.0081, -0.1939],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
       grad_fn=<SelectBackward0>)

In [80]:
h.shape

torch.Size([2, 2, 4])

torch.Tensor

In [46]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
seq = torch.tensor([[1, 2, 0], [3, 0, 0], [4, 5, 6]])
# seq = nn.Embedding(7, 3)(seq)
print(seq)
lens = [2, 1, 3]

tensor([[1, 2, 0],
        [3, 0, 0],
        [4, 5, 6]])


In [68]:
packed = pack_padded_sequence(seq, lens, batch_first=True, enforce_sorted=False)
packed

PackedSequence(data=tensor([4, 1, 3, 5, 2, 6]), batch_sizes=tensor([3, 2, 1]), sorted_indices=tensor([2, 0, 1]), unsorted_indices=tensor([1, 2, 0]))

In [59]:
seq_unpacked, lens_unpacked = pad_packed_sequence(packed, batch_first=True)

In [60]:
seq_unpacked

tensor([[1, 2, 0],
        [3, 0, 0],
        [4, 5, 6]])

In [61]:
lens_unpacked

tensor([2, 1, 3])

## Torch.permute

In [69]:
X = torch.arange(24).reshape((2, 3, 4))
X

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

In [70]:
X = X.permute(1, 0, 2)
X

tensor([[[ 0,  1,  2,  3],
         [12, 13, 14, 15]],

        [[ 4,  5,  6,  7],
         [16, 17, 18, 19]],

        [[ 8,  9, 10, 11],
         [20, 21, 22, 23]]])

## Encode

In [1]:
"""
src_len: 3
batch_size: 2
embedding_size: 5
"""
src_len = 3
batch_size = 2
embedding_size = 5
hidden_size = 4

src_length = [3, 2]
torch.manual_seed(0)
X = torch.randn((src_len, batch_size, embedding_size))
X

NameError: name 'torch' is not defined

In [191]:
packed_X = pack_padded_sequence(X, src_length)
packed_X

PackedSequence(data=tensor([[-1.1258, -1.1524, -0.2506, -0.4339,  0.8487],
        [ 0.6920, -0.3160, -2.1152,  0.3223, -1.2633],
        [ 0.3500,  0.3081,  0.1198,  1.2377, -0.1435],
        [-0.1116, -0.6136,  0.0316, -0.4927,  0.2484],
        [ 0.4397,  0.1124, -0.8411, -2.3160, -0.1023]]), batch_sizes=tensor([2, 2, 1]), sorted_indices=None, unsorted_indices=None)

In [192]:
Encoder = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, bidirectional=True)
encoded_X, (h_n, c_n) = Encoder(packed_X)
encoded_X

PackedSequence(data=tensor([[ 0.0872, -0.0503, -0.0445,  0.0584, -0.2065, -0.4339,  0.2128,  0.0675],
        [-0.2401, -0.0077, -0.0171,  0.1581, -0.2012, -0.2448,  0.0195,  0.0094],
        [-0.1306, -0.1658, -0.1099, -0.1172, -0.1101, -0.0958, -0.0525, -0.0151],
        [-0.0590,  0.0121, -0.0462,  0.1905, -0.1235, -0.2117,  0.1316,  0.0339],
        [ 0.0397,  0.0602,  0.0749,  0.1196, -0.0026,  0.0642,  0.3751, -0.0356]],
       grad_fn=<CatBackward0>), batch_sizes=tensor([2, 2, 1]), sorted_indices=None, unsorted_indices=None)

In [193]:
# hidden state
h_n

tensor([[[ 0.0397,  0.0602,  0.0749,  0.1196],
         [-0.0590,  0.0121, -0.0462,  0.1905]],

        [[-0.2065, -0.4339,  0.2128,  0.0675],
         [-0.2012, -0.2448,  0.0195,  0.0094]]], grad_fn=<StackBackward0>)

In [194]:
# cell state
c_n

tensor([[[ 0.0922,  0.4373,  0.2559,  0.1399],
         [-0.1037,  0.0438, -0.1153,  0.2507]],

        [[-0.5648, -0.6973,  0.2884,  0.1231],
         [-0.5228, -0.4113,  0.0305,  0.0225]]], grad_fn=<StackBackward0>)

In [187]:
torch.cat((c_n[0], c_n[1]), dim=1)

tensor([[ 0.0550, -0.4747,  0.0579, -0.2680,  0.4750, -0.5831,  0.6619, -0.1194],
        [ 0.0046, -0.3216,  0.2925, -0.1548,  0.1405, -0.6188,  0.2519, -0.1294]],
       grad_fn=<CatBackward0>)

In [188]:
enc_hiddens = pad_packed_sequence(encoded_X)
enc_hiddens

(tensor([[[-0.0350, -0.0484,  0.2423,  0.0347,  0.2130, -0.2706,  0.1727,
           -0.0421],
          [ 0.1740, -0.2226, -0.0306, -0.2359,  0.1024, -0.2922,  0.1648,
           -0.0897]],
 
         [[-0.2315, -0.2480,  0.2309,  0.0017,  0.1913, -0.1477,  0.2213,
           -0.0290],
          [ 0.0033, -0.1685,  0.1233, -0.0630,  0.1705, -0.2066,  0.1172,
           -0.0117]],
 
         [[ 0.0413, -0.1580,  0.0086, -0.1854,  0.3150, -0.2815,  0.0363,
           -0.0552],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
            0.0000]]], grad_fn=<CopySlices>),
 tensor([3, 2]))

In [189]:
enc_hiddens[0].permute(1, 0, 2)

tensor([[[-0.0350, -0.0484,  0.2423,  0.0347,  0.2130, -0.2706,  0.1727,
          -0.0421],
         [-0.2315, -0.2480,  0.2309,  0.0017,  0.1913, -0.1477,  0.2213,
          -0.0290],
         [ 0.0413, -0.1580,  0.0086, -0.1854,  0.3150, -0.2815,  0.0363,
          -0.0552]],

        [[ 0.1740, -0.2226, -0.0306, -0.2359,  0.1024, -0.2922,  0.1648,
          -0.0897],
         [ 0.0033, -0.1685,  0.1233, -0.0630,  0.1705, -0.2066,  0.1172,
          -0.0117],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000]]], grad_fn=<PermuteBackward0>)

## Torch.split

In [97]:
X = torch.randn(4, 3)
X

tensor([[ 0.4802, -0.0287,  1.3481],
        [-1.8695,  0.2533,  1.4783],
        [ 0.9611, -1.1104, -0.2056],
        [-0.4674,  0.3604,  0.2584]])

In [99]:
torch.split(X, 1)

(tensor([[ 0.4802, -0.0287,  1.3481]]),
 tensor([[-1.8695,  0.2533,  1.4783]]),
 tensor([[ 0.9611, -1.1104, -0.2056]]),
 tensor([[-0.4674,  0.3604,  0.2584]]))