In [1]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

In [32]:
seqs = ['gigantic_string','tiny_str','medium_str']

# make <pad> idx 0
vocab = ['<pad>'] + sorted(set(''.join(seqs)))

vocab

['<pad>', '_', 'a', 'c', 'd', 'e', 'g', 'i', 'm', 'n', 'r', 's', 't', 'u', 'y']

In [69]:
# make model
embed = nn.Embedding(len(vocab), 10).cpu()
lstm = nn.LSTM(10, 5).cpu()
lstm.hidden_size

5

In [34]:
vectorized_seqs = [[vocab.index(tok) for tok in seq] for seq in seqs]

In [35]:
vectorized_seqs

[[6, 7, 6, 2, 9, 12, 7, 3, 1, 11, 12, 10, 7, 9, 6],
 [12, 7, 9, 14, 1, 11, 12, 10],
 [8, 5, 4, 7, 13, 8, 1, 11, 12, 10]]

In [36]:
# get the length of each seq in your batch
seq_lengths = torch.LongTensor([len(seq) for seq in vectorized_seqs]).cpu()
seq_lengths

tensor([15,  8, 10])

In [37]:
# dump padding everywhere, and place seqs on the left.
# NOTE: you only need a tensor as big as your longest sequence
seq_tensor = torch.zeros((len(vectorized_seqs), seq_lengths.max())).long().cpu()
for idx, (seq, seqlen) in enumerate(zip(vectorized_seqs, seq_lengths)):
    seq_tensor[idx, :seqlen] = torch.LongTensor(seq)
    
seq_tensor

tensor([[ 6,  7,  6,  2,  9, 12,  7,  3,  1, 11, 12, 10,  7,  9,  6],
        [12,  7,  9, 14,  1, 11, 12, 10,  0,  0,  0,  0,  0,  0,  0],
        [ 8,  5,  4,  7, 13,  8,  1, 11, 12, 10,  0,  0,  0,  0,  0]])

In [38]:
# SORT YOUR TENSORS BY LENGTH!
seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
seq_tensor = seq_tensor[perm_idx]

seq_tensor

tensor([[ 6,  7,  6,  2,  9, 12,  7,  3,  1, 11, 12, 10,  7,  9,  6],
        [ 8,  5,  4,  7, 13,  8,  1, 11, 12, 10,  0,  0,  0,  0,  0],
        [12,  7,  9, 14,  1, 11, 12, 10,  0,  0,  0,  0,  0,  0,  0]])

In [39]:
seq_tensor = seq_tensor.transpose(0,1) # (B,L,D) -> (L,B,D)
seq_tensor

tensor([[ 6,  8, 12],
        [ 7,  5,  7],
        [ 6,  4,  9],
        [ 2,  7, 14],
        [ 9, 13,  1],
        [12,  8, 11],
        [ 7,  1, 12],
        [ 3, 11, 10],
        [ 1, 12,  0],
        [11, 10,  0],
        [12,  0,  0],
        [10,  0,  0],
        [ 7,  0,  0],
        [ 9,  0,  0],
        [ 6,  0,  0]])

In [40]:
# embed your sequences
seq_tensor = embed(seq_tensor)
seq_tensor

tensor([[[-1.1346,  0.8192,  0.2694, -1.0236, -2.1938,  0.5937, -1.0437,
           0.9931,  0.5774, -0.6358],
         [ 0.9024, -0.9464, -0.0745,  1.0031, -0.2675,  0.0685,  0.4278,
           2.2093, -0.3803,  1.0325],
         [ 0.5292,  1.3212, -0.1149, -1.2809,  0.4934,  0.1881, -1.4785,
          -0.0957,  0.9062,  0.3698]],

        [[ 0.0148,  0.3304, -0.4568, -1.0534, -1.8018,  2.1466, -0.7468,
          -0.5086, -0.7602, -0.0784],
         [-0.9049, -2.2375, -0.5936, -0.4607, -0.6410, -0.8513, -0.8477,
           0.1328, -0.8828,  0.5129],
         [ 0.0148,  0.3304, -0.4568, -1.0534, -1.8018,  2.1466, -0.7468,
          -0.5086, -0.7602, -0.0784]],

        [[-1.1346,  0.8192,  0.2694, -1.0236, -2.1938,  0.5937, -1.0437,
           0.9931,  0.5774, -0.6358],
         [ 0.2574, -0.9649, -1.3667,  0.3344, -0.1496,  1.3959,  0.5439,
           0.6314,  0.3370,  1.6015],
         [-1.5850,  0.0554,  0.8987,  0.9379, -2.3707, -0.3422, -0.2109,
           1.3961,  0.2879,  0.4053

In [42]:
# pack them up nicely
packed_input = pack_padded_sequence(seq_tensor, seq_lengths.cpu().numpy())
packed_input

PackedSequence(data=tensor([[-1.1346,  0.8192,  0.2694, -1.0236, -2.1938,  0.5937, -1.0437,  0.9931,
          0.5774, -0.6358],
        [ 0.9024, -0.9464, -0.0745,  1.0031, -0.2675,  0.0685,  0.4278,  2.2093,
         -0.3803,  1.0325],
        [ 0.5292,  1.3212, -0.1149, -1.2809,  0.4934,  0.1881, -1.4785, -0.0957,
          0.9062,  0.3698],
        [ 0.0148,  0.3304, -0.4568, -1.0534, -1.8018,  2.1466, -0.7468, -0.5086,
         -0.7602, -0.0784],
        [-0.9049, -2.2375, -0.5936, -0.4607, -0.6410, -0.8513, -0.8477,  0.1328,
         -0.8828,  0.5129],
        [ 0.0148,  0.3304, -0.4568, -1.0534, -1.8018,  2.1466, -0.7468, -0.5086,
         -0.7602, -0.0784],
        [-1.1346,  0.8192,  0.2694, -1.0236, -2.1938,  0.5937, -1.0437,  0.9931,
          0.5774, -0.6358],
        [ 0.2574, -0.9649, -1.3667,  0.3344, -0.1496,  1.3959,  0.5439,  0.6314,
          0.3370,  1.6015],
        [-1.5850,  0.0554,  0.8987,  0.9379, -2.3707, -0.3422, -0.2109,  1.3961,
          0.2879,  0.4053],

In [70]:
# throw them through your LSTM (remember to give batch_first=True here if you packed with it)
packed_output, (ht, ct) = lstm(packed_input)

len(packed_output)

4

In [44]:
# unpack your output if required
output, _ = pad_packed_sequence(packed_output)
output

tensor([[[ 0.0596,  0.2298, -0.0566, -0.1202,  0.0247],
         [ 0.0981,  0.0968, -0.2379, -0.3277,  0.0095],
         [ 0.0297, -0.0450,  0.0108, -0.1461,  0.1945]],

        [[ 0.0664,  0.1662, -0.0239, -0.0966, -0.1587],
         [ 0.0699, -0.0955, -0.3017, -0.1477, -0.5515],
         [ 0.0654, -0.1383,  0.0105, -0.0921,  0.0347]],

        [[ 0.0719,  0.3170, -0.0687, -0.1164, -0.0988],
         [ 0.1125,  0.0234, -0.2327, -0.1314, -0.5589],
         [ 0.1497,  0.1363, -0.1713, -0.2054, -0.2830]],

        [[ 0.0290,  0.0224, -0.0258, -0.2073, -0.0824],
         [ 0.0679,  0.0220, -0.1565, -0.0082, -0.3966],
         [ 0.0729, -0.2943, -0.2245, -0.3810,  0.0464]],

        [[ 0.1318,  0.2027, -0.1826, -0.2431, -0.3489],
         [ 0.2006,  0.0599, -0.0218, -0.0396, -0.4671],
         [ 0.2130, -0.0439, -0.2591, -0.1935, -0.1182]],

        [[ 0.0484,  0.1607, -0.1635, -0.1784,  0.0675],
         [ 0.2036,  0.1792, -0.2417, -0.3852, -0.2269],
         [ 0.1149, -0.1614, -0.4316,  

In [45]:
# Or if you just want the final hidden state?
print (ht[-1])

tensor([[ 0.0647,  0.3305, -0.2067, -0.1223, -0.2079],
        [ 0.0632, -0.0767, -0.2668, -0.2260, -0.0626],
        [ 0.0580, -0.1709, -0.2422, -0.2582, -0.0009]],
       grad_fn=<SelectBackward>)


In [46]:
# REMEMBER: Your outputs are sorted. If you want the original ordering
# back (to compare to some gt labels) unsort them
_, unperm_idx = perm_idx.sort(0)
output = output[unperm_idx]
output

tensor([[[ 0.0596,  0.2298, -0.0566, -0.1202,  0.0247],
         [ 0.0981,  0.0968, -0.2379, -0.3277,  0.0095],
         [ 0.0297, -0.0450,  0.0108, -0.1461,  0.1945]],

        [[ 0.0719,  0.3170, -0.0687, -0.1164, -0.0988],
         [ 0.1125,  0.0234, -0.2327, -0.1314, -0.5589],
         [ 0.1497,  0.1363, -0.1713, -0.2054, -0.2830]],

        [[ 0.0664,  0.1662, -0.0239, -0.0966, -0.1587],
         [ 0.0699, -0.0955, -0.3017, -0.1477, -0.5515],
         [ 0.0654, -0.1383,  0.0105, -0.0921,  0.0347]]],
       grad_fn=<IndexBackward>)

In [57]:
from torch.nn.utils.rnn import pad_sequence
import numpy as np

In [58]:
s = np.array([[1,1,1,1,1],[2,2,2],[3,3],[4]])
l = [len(i) for i in s]
l

[5, 3, 2, 1]

In [63]:
a = torch.ones(25, 300)
b = torch.ones(22, 300)
c = torch.ones(15, 300)
pad_sequence([a, b, c]).size()

torch.Size([25, 3, 300])

In [67]:
a.size()

torch.Size([25, 300])

In [68]:
a[[12,3]].size()

torch.Size([2, 300])

In [62]:
def collate_fn(batch):
    # Let's assume that each element in "batch" is a tuple (data, label).
    # Sort the batch in the descending order
    sorted_batch = sorted(batch, key=lambda x: x[0].size(0), reverse=True)
    # Get each sequence and pad it
    sequences = [x[0] for x in sorted_batch]
    sequences_padded = pad_sequence(sequences, batch_first=True)
    # Also need to store the length of each sequence
    # This is later needed in order to unpad the sequences
    lengths = torch.LongTensor([len(x) for x in sequences])
    # Don't forget to grab the labels of the *sorted* batch
    labels = torch.LongTensor(map(lambda x: x[1], sorted_batch))
    return sequences_padded, lengths, labels

In [59]:
pad_sequence(s)

AttributeError: 'list' object has no attribute 'size'

In [52]:
pad_packed_sequence(s, l)

AttributeError: 'list' object has no attribute 'batch_sizes'