Imports

In [18]:
import torch
import torch.nn as nn

In [19]:
# torch is the core PyTorch library including tensor operations/utilities/etc
# torch.nn is the module with neural network implementations - layers, loss functions, etc

Embedding

In [21]:
emb = nn.Embedding(1000,50)
a = emb(torch.tensor([1,2,3]))
a.shape

torch.Size([3, 50])

In [22]:
# nn.Embedding(num_embeddings, embedding_dim)
# - num_embeddings refers to the "dictionary size" or "vocab size" - the number of words you're using
# - embedding_dim refers to the size of the embedding you'll receive from using this

# this function uses a lookup table that maps integer indices (what you give it) to vectors of a fixed size
# The "embedding table" is initialized with random values, which are updated with backprop

# every time you pass an integer to the function, it returns the vector that's currently there (it'll change)

RNN

In [51]:
sequence_length = 10
input_features = 50
hidden_size = 300
batch_size = 16
num_layers = 1
rnn = torch.nn.RNN(input_features, hidden_size, num_layers, batch_first=True)
batch_x = torch.randn((4, 10, 50))
out, hidden = rnn(batch_x)
print('batch x', out.shape)

unbatch_x = torch.randn((10, 50))
out, hidden = rnn(unbatch_x)
print('unbatch x', out.shape)

batch x torch.Size([4, 10, 300])
unbatch x torch.Size([10, 300])


In [52]:
# 2 required arguments - input_size and hidden_size
# - the number of input features
# - the number of features in the hidden state h

# can also pass num_layers - 1 by default, specifies the number of recurrent layers. If >1 we're stacking RNNs
    # so the second takes outputs of the first and computes final results

# unbatched input is size (sequence length, input_size)
    # sequence length is the number of words in the office case
    # unbatched could be (100, 5000) for 100 words in a sequence, 5000 input_size (50-dim embedding)

# batched input is size (batch_size, sequence length, input_size)
    # must set batch_first=True when defining layer

# outputs become (sequence length, D*hidden_size) and (batch_size, sequence_length, D*hidden_size)
    # D = 2 if bidirectional else 1

In [61]:
h0 = torch.zeros(num_layers, batch_size, hidden_size)
x = torch.zeros(batch_size, sequence_length, input_features)
out, hidden = rnn(x, h0)
out.shape

torch.Size([16, 10, 300])

In [None]:
# initial hidden state takes values (num_layers, batch_size, hidden_size)
    # batch_first=True does not apply to hidden or cell states!
    # if you include batch size in h0, you must include it in x
        # you could take it out of both too if you want


In [63]:
final_out = out[:, -1, :]
final_out.shape

torch.Size([16, 300])

In [None]:
# the "final output" is the output we get from the rnn at the last time step
    # we get this by keeping all batch_size, using the -1 sequence_length, and all hidden_size


In [66]:
linear = nn.Linear(100,10)
out = linear(torch.randn(100))
out.shape

torch.Size([10])

In [99]:
input_size = 50
hidden_size = 200
num_layers = 1
dict_size = 1000
emb_size = 50

# ! input_size must match emb_size


class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dict_size, emb_size):
        super(RNNClassifier, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dict_size = dict_size
        self.emb_size = emb_size
        self.embedding = nn.Embedding(dict_size, emb_size)
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 21)
        
    def forward(self, x):

        x = self.embedding(x)
        print(x.shape)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        print(x.shape, h0.shape)
        out, _ = self.rnn(x, h0)

        out = self.fc(out[:, -1, :])
        return out

model = RNNClassifier(input_size, hidden_size, num_layers, dict_size, emb_size)

In [100]:
batch_size = 10
x = torch.zeros(batch_size, input_size).int()
out = model(x)

torch.Size([10, 50, 50])
torch.Size([10, 50, 50]) torch.Size([1, 10, 200])


In [103]:
out

tensor([[ 0.3445,  0.2529, -0.0758, -0.1560, -0.1792, -0.2978,  0.0266,  0.2951,
          0.1216,  0.2107, -0.1691, -0.1297,  0.0762, -0.3954, -0.2504,  0.0926,
         -0.1776,  0.3120, -0.1057,  0.0903,  0.2854],
        [ 0.3445,  0.2529, -0.0758, -0.1560, -0.1792, -0.2978,  0.0266,  0.2951,
          0.1216,  0.2107, -0.1691, -0.1297,  0.0762, -0.3954, -0.2504,  0.0926,
         -0.1776,  0.3120, -0.1057,  0.0903,  0.2854],
        [ 0.3445,  0.2529, -0.0758, -0.1560, -0.1792, -0.2978,  0.0266,  0.2951,
          0.1216,  0.2107, -0.1691, -0.1297,  0.0762, -0.3954, -0.2504,  0.0926,
         -0.1776,  0.3120, -0.1057,  0.0903,  0.2854],
        [ 0.3445,  0.2529, -0.0758, -0.1560, -0.1792, -0.2978,  0.0266,  0.2951,
          0.1216,  0.2107, -0.1691, -0.1297,  0.0762, -0.3954, -0.2504,  0.0926,
         -0.1776,  0.3120, -0.1057,  0.0903,  0.2854],
        [ 0.3445,  0.2529, -0.0758, -0.1560, -0.1792, -0.2978,  0.0266,  0.2951,
          0.1216,  0.2107, -0.1691, -0.1297,  0.076

In [104]:
a = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)

In [107]:
loss = nn.CrossEntropyLoss()
loss(a, target)

tensor(1.8621, grad_fn=<NllLossBackward0>)

In [121]:
a = torch.randn(21)
a[torch.argmax(a)]


tensor(2.1643)