In [1]:
import os
import numpy as np

import torch
from torch import nn
from torch import optim

from torch.utils.data import Dataset, DataLoader

In [2]:
### Data files
DIR = os.getcwd().replace('notebooks', 'data')
print(os.listdir(DIR))

['data\\Arabic.txt', 'data\\Chinese.txt', 'data\\Czech.txt', 'data\\Dutch.txt', 'data\\English.txt', 'data\\French.txt', 'data\\German.txt', 'data\\Greek.txt', 'data\\Irish.txt', 'data\\Italian.txt', 'data\\Japanese.txt', 'data\\Korean.txt', 'data\\Polish.txt', 'data\\Portuguese.txt', 'data\\Russian.txt', 'data\\Scottish.txt', 'data\\Spanish.txt', 'data\\Vietnamese.txt']


In [175]:
class TextDataset(Dataset):
    '''
    Text Dataset object.
    '''
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.file_names = list(map(lambda x: os.path.join(self.root_dir, x), os.listdir(self.root_dir)))
        self.classes = list(map(lambda x: x.split('.')[0], os.listdir(self.root_dir)))
        
        self.int2label = dict(enumerate(self.classes))
        self.label2int = {v : k for (k, v) in self.int2label.items()}
        
        self.files = [self.read_file(f) for f in self.file_names]
        self.data, self.labels = list(), list()
        
        for file, label in self.files:
            self.data += file
            self.labels += label
        
        self.unique_characters = self.get_unique_chars()
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, ix):
        string = self.data[ix]
        string_data = torch.tensor([self.unique_characters[s] for s in string], dtype = torch.int32)
        return string_data, self.labels[ix]
    
    def read_file(self, f):
        with open(f, 'rb') as file:
            contents = str(file.read(), encoding = 'utf-8').split('\n')
        
        labels = [self.label2int[f.split('.')[0].split('\\')[-1]]]*len(contents)
        
        return contents[:-1], labels[:-1]
    
    def get_unique_chars(self):
        unique_chars = sorted(list(set(''.join(list(map(lambda x: ''.join(x[0]), self.files))))))
        return {v : k for (k, v) in dict(enumerate(unique_chars)).items()}

In [176]:
data = TextDataset(DIR)

In [177]:
data[0]

(tensor([57, 85, 14, 58, 47,  2], dtype=torch.int32), 0)

In [178]:
data[100][0].shape

torch.Size([5])

In [179]:
data.unique_characters

{'G': 0,
 'à': 1,
 'y': 2,
 'ł': 3,
 'n': 4,
 '1': 5,
 'l': 6,
 'ã': 7,
 'q': 8,
 'Ż': 9,
 'S': 10,
 '/': 11,
 'R': 12,
 'v': 13,
 'o': 14,
 'N': 15,
 'C': 16,
 'í': 17,
 'O': 18,
 'F': 19,
 'f': 20,
 't': 21,
 'V': 22,
 'k': 23,
 'ß': 24,
 'ñ': 25,
 'á': 26,
 'X': 27,
 'Y': 28,
 'T': 29,
 'ù': 30,
 'I': 31,
 'ö': 32,
 'é': 33,
 'ä': 34,
 'd': 35,
 'U': 36,
 'ç': 37,
 'ú': 38,
 'ń': 39,
 'Q': 40,
 'c': 41,
 'Ś': 42,
 "'": 43,
 '\xa0': 44,
 'J': 45,
 'õ': 46,
 'r': 47,
 'e': 48,
 'g': 49,
 'E': 50,
 'ż': 51,
 'M': 52,
 ':': 53,
 'P': 54,
 'p': 55,
 'É': 56,
 'K': 57,
 'u': 58,
 'ì': 59,
 'L': 60,
 'Á': 61,
 'ó': 62,
 'A': 63,
 'ò': 64,
 's': 65,
 ' ': 66,
 'z': 67,
 'w': 68,
 'ê': 69,
 ',': 70,
 '-': 71,
 'm': 72,
 'j': 73,
 'Z': 74,
 'a': 75,
 'ü': 76,
 'i': 77,
 'x': 78,
 'D': 79,
 'B': 80,
 'è': 81,
 'W': 82,
 'H': 83,
 'ą': 84,
 'h': 85,
 'b': 86}

In [180]:
test = torch.randint(0, 20, size = (32, 5, 10))

In [181]:
embedder = nn.Embedding(100, 3)

In [182]:
embedder(data[0][0])

tensor([[ 1.3963,  0.5748,  1.8907],
        [-0.4406,  0.2724, -2.2170],
        [-1.0668,  0.1891, -0.2612],
        [ 0.8046,  0.0901,  0.7425],
        [-0.0429,  0.1080,  0.0153],
        [-0.4302,  0.4469,  1.7570]], grad_fn=<EmbeddingBackward0>)

In [183]:
embedder(data[100][0])

tensor([[-0.6505, -0.0347, -0.2045],
        [ 1.0523,  0.3348,  1.7708],
        [ 0.1530, -1.4109,  0.2866],
        [ 0.1530, -1.4109,  0.2866],
        [-0.2488,  0.6012,  1.2882]], grad_fn=<EmbeddingBackward0>)

In [184]:
r = embedder(test)

In [185]:
r.shape

torch.Size([32, 5, 10, 3])

In [186]:
print(help(nn.RNN))

Help on class RNN in module torch.nn.modules.rnn:

class RNN(RNNBase)
 |  RNN(*args, **kwargs)
 |  
 |  Applies a multi-layer Elman RNN with :math:`\tanh` or :math:`\text{ReLU}` non-linearity to an
 |  input sequence.
 |  
 |  
 |  For each element in the input sequence, each layer computes the following
 |  function:
 |  
 |  .. math::
 |      h_t = \tanh(W_{ih} x_t + b_{ih} + W_{hh} h_{(t-1)} + b_{hh})
 |  
 |  where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is
 |  the input at time `t`, and :math:`h_{(t-1)}` is the hidden state of the
 |  previous layer at time `t-1` or the initial hidden state at time `0`.
 |  If :attr:`nonlinearity` is ``'relu'``, then :math:`\text{ReLU}` is used instead of :math:`\tanh`.
 |  
 |  Args:
 |      input_size: The number of expected features in the input `x`
 |      hidden_size: The number of features in the hidden state `h`
 |      num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
 |          would mean stacking tw

In [187]:
len(data)

20074

In [188]:
data[0][0]

tensor([57, 85, 14, 58, 47,  2], dtype=torch.int32)

In [189]:
test_ix = np.random.randint(low = 0, high = len(data), size = 1000)
train_ix = np.array([n for n in range(0, len(data)) if n not in test_ix])

test_sampler = torch.utils.data.Subset(data, indices = test_ix)
train_sampler = torch.utils.data.Subset(data, indices = train_ix)

In [245]:
def pad_and_pack(batch):
    data_ = []
    labels = []
    lengths = []
    
    for X, y in batch:
        data_.append(X)
        labels.append(y)
        lengths.append(X.shape[0])
        
    X_pad = torch.nn.utils.rnn.pad_sequence(data_, batch_first = False)
    print(X_pad.shape)
    X_pack = torch.nn.utils.rnn.pack_padded_sequence(X_pad, lengths, batch_first = False, enforce_sorted = False)
    
    return X_pack, torch.tensor(labels, dtype = torch.int64)

In [246]:
d = [(torch.randn(torch.randint(30, 40, (1,)).item(), 16), torch.randint(0, 5, (1,)).item()) for n in range(48)]

In [247]:
for i in d:
    print(i[0].shape)

torch.Size([32, 16])
torch.Size([38, 16])
torch.Size([32, 16])
torch.Size([30, 16])
torch.Size([30, 16])
torch.Size([30, 16])
torch.Size([38, 16])
torch.Size([32, 16])
torch.Size([39, 16])
torch.Size([32, 16])
torch.Size([39, 16])
torch.Size([37, 16])
torch.Size([30, 16])
torch.Size([38, 16])
torch.Size([33, 16])
torch.Size([31, 16])
torch.Size([33, 16])
torch.Size([37, 16])
torch.Size([30, 16])
torch.Size([39, 16])
torch.Size([33, 16])
torch.Size([36, 16])
torch.Size([34, 16])
torch.Size([34, 16])
torch.Size([38, 16])
torch.Size([34, 16])
torch.Size([37, 16])
torch.Size([34, 16])
torch.Size([33, 16])
torch.Size([38, 16])
torch.Size([38, 16])
torch.Size([30, 16])
torch.Size([36, 16])
torch.Size([31, 16])
torch.Size([33, 16])
torch.Size([36, 16])
torch.Size([32, 16])
torch.Size([38, 16])
torch.Size([31, 16])
torch.Size([38, 16])
torch.Size([37, 16])
torch.Size([37, 16])
torch.Size([33, 16])
torch.Size([34, 16])
torch.Size([39, 16])
torch.Size([35, 16])
torch.Size([38, 16])
torch.Size([3

In [248]:
a, b = pad_and_pack(d)

torch.Size([39, 48, 16])


In [249]:
train_sampler, test_sampler = torch.utils.data.random_split(data, lengths = [len(data)-1000, 1000])

train_dl = DataLoader(train_sampler, batch_size = 16, collate_fn = pad_and_pack)
test_dl = DataLoader(test_sampler, batch_size = 16, collate_fn = pad_and_pack)

In [250]:
X, y = next(iter(train_dl))

torch.Size([11, 16])


In [251]:
X.data.shape

torch.Size([110])

In [252]:
torch.nn.utils.rnn.pad_packed_sequence(X, batch_first = True)[0].shape

torch.Size([16, 11])

In [253]:
r = rnn.embedder(X)

torch.Size([16, 11, 16])


In [254]:
r_ = rnn.rnn(r)

In [255]:
torch.nn.utils.rnn.pad_packed_sequence(r_[0], batch_first = True)[0].shape

torch.Size([16, 11, 128])

In [256]:
X.data

tensor([80, 54, 16, 80, 19, 79, 29, 57, 54, 22, 10, 16, 80, 63, 57, 45, 48, 47,
        85, 48, 77, 75, 47, 75, 14, 48, 47, 75, 58, 68, 68, 75, 47, 77, 48, 65,
         4, 13, 77,  4, 41,  4, 14, 72, 21, 75, 75, 72, 48, 49, 47, 21,  4,  6,
        72, 75, 14,  4, 58, 55, 21, 35,  4, 67, 14, 21, 58, 77,  2, 86, 75, 41,
        48, 47, 14,  4, 47, 58, 67, 72, 75,  6,  4, 23,  4, 48, 14, 65, 85, 14,
        21, 48, 13, 13, 85, 48, 47, 14, 77, 65, 23, 13, 48, 13, 41, 23, 77, 85,
         2,  4], dtype=torch.int32)

In [257]:
y.shape

torch.Size([16])

In [258]:
class PackedEmbedding(nn.Module):
    def __init__(self, embedding_layer):
        super(PackedEmbedding, self).__init__()
        self.embedding = embedding_layer
        
    def forward(self, x):
        if type(x) == torch.nn.utils.rnn.PackedSequence:
            unpacked_sequence, lengths = torch.nn.utils.rnn.pad_packed_sequence(x, batch_first = True)
            y = self.embedding(unpacked_sequence)
            y = torch.nn.utils.rnn.pack_padded_sequence(y, lengths, batch_first = True, enforce_sorted = False)
        else:
            y = self.embedding(x)
        
        return y

In [259]:
class RNNNetwork(nn.Module):
    def __init__(self, vocab_size, hidden_size, num_class, feature_size):
        super(RNNNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.num_class = num_class
        self.feature_size = feature_size
        
        self.embedder = PackedEmbedding(nn.Embedding(self.vocab_size, self.feature_size))
        self.rnn = nn.RNN(input_size = self.feature_size, hidden_size = self.hidden_size, batch_first = True)
        self.fc = nn.Linear(self.hidden_size, self.num_class)
        
        
    def forward(self, x):
        x = self.embedder(x)
        output, state = self.rnn(x)
        y = torch.nn.utils.rnn.pad_packed_sequence(output, batch_first = True)[0][:, -1, :].squeeze()
        return torch.softmax(self.fc(y), dim = -1)

In [260]:
rnn = RNNNetwork(vocab_size = len(data.unique_characters), hidden_size = 128,
                 num_class = len(data.classes), feature_size = 16)

In [261]:
print(rnn)

RNNNetwork(
  (embedder): PackedEmbedding(
    (embedding): Embedding(87, 16)
  )
  (rnn): RNN(16, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=18, bias=True)
)


In [262]:
epochs = 20
criterion = nn.NLLLoss()
opt = optim.Adam(rnn.parameters(), lr = 1e-4)

In [264]:
for epoch in range(1, epochs +1):
    train_loss = list()
    test_loss = list()
    
    rnn.train()
    for X, y in train_dl:
        y_pred = rnn(X)
        loss = criterion(y_pred, y)
        opt.zero_grad()
        loss.backward()
        opt.step()
        train_loss.append(loss.item())
    
    rnn.eval()
    for X_, y_ in test_dl:
        _y = rnn(X_)
        test_loss.append(criterion(_y, y_))
    
    print(f"Epoch {epoch}: Train loss {sum(train_loss)/len(train_loss)}; Test loss: {sum(test_loss)/len(test_loss)}")

torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([8, 16])
torch.Size([16, 8, 16])
torch.Size([8, 16])
torch.Size([16, 8, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16]

torch.Size([16, 10, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([16, 16])
torch.Size([16, 16, 16])
torch.Size([14, 16])
torch.Size([16, 14, 16])
torch.Size([14, 16])
torch.Size([16, 14, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([14, 16])
torch.Size([16, 14, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Siz

torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([17, 16])
torch.Size([16, 17, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([17, 16])
torch.Size([16, 17, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([14, 16])
torch.Size([16, 14, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([9, 16])
torch.Size([16, 

torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([17, 16])
torch.Size([16, 17, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([15, 16])
torch.Size([16, 15, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([11, 16])
torch.Size([16, 11,

torch.Size([16, 10, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([17, 16])
torch.Size([16, 17, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([1

torch.Size([16, 11, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([17, 16])
torch.Size([16, 17, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([11, 16]

torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([14, 16])
torch.Size([16, 14, 16])
torch.Size([14, 16])
torch.Size([16, 14, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([14, 16])
torch.Size([16, 14, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([9, 16])
torch.Size([16, 9, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([13, 16])
torch.Size([16, 13, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([10, 16])
torch.Size([16, 10, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([8, 16])
torch.Size([16, 8, 16])
torch.Size([12, 16])
torch.Size([16, 12, 16])
torch.Size([11, 16])
torch.Size([16, 11, 16])
torch.Size([10, 16])
torch.Size([16, 1

ValueError: Expected input batch_size (16) to match target batch_size (2).

In [110]:
X

PackedSequence(data=tensor([55, 72, 78,  8, 78, 31,  8,  3, 78, 19, 51, 78, 69,  3, 31,  3, 77,  8,
        67, 84, 67, 82, 19, 31, 78,  1, 24, 24, 19, 63, 19, 64, 46, 39, 35, 38,
        68, 38, 38, 16, 35, 16, 71,  4, 13, 16, 39, 38, 17, 17, 16, 17, 17, 39,
        46, 46, 35, 38, 17, 17, 39, 79, 71, 16, 56, 56, 12, 80, 17, 56,  4, 38,
        16, 62, 17,  2, 45, 53, 35, 17, 80,  4, 12, 12, 16, 12, 53, 35, 35, 17,
        12, 35, 46, 16, 16, 58, 71, 71, 16, 79, 12, 71,  9, 58,  2, 80, 79, 12,
        71, 16, 71, 12,  9, 16,  4, 12, 62, 39, 39, 35, 17, 13, 12, 38, 12, 12,
         4, 39, 58,  4, 39, 38, 16, 16, 16, 33, 17, 17,  9, 17,  4, 16, 58, 16,
        53, 38, 16, 71, 71, 62, 38, 17, 17,  9,  4,  9, 56, 13, 38, 39, 53, 16,
        79, 12, 56, 16, 13, 70, 46,  4, 39,  2, 16, 62, 79, 56, 12, 17, 38, 16,
        16, 13, 80, 38, 80, 13, 39,  4, 62, 80, 39,  4, 58,  4, 56, 56, 26, 16,
         4, 39, 17, 39, 38, 12,  4, 62, 62, 38, 56, 39, 80, 56, 26, 39, 16, 39,
        39, 39, 26, 

In [108]:
X.data.shape

torch.Size([245])

In [109]:
y.shape

torch.Size([32])

In [96]:
X.shape

torch.Size([3])

In [103]:
b = (torch.randn(32, 10), torch.randn(32))

In [104]:
b[0].shape

torch.Size([32, 10])

In [107]:
pad_and_pack(data.files[:20])

TypeError: expected Tensor as element 0 in argument 0, but got list