In [68]:
import json
import torch
import numpy as np

from torch import nn


## Load language data

Load sentences from file

In [69]:
with open("sentences.json") as f:
    sentences = json.load(f)
sentences[:5]

[['Located', 'South'],
 ['Located', 'Center', 'Object', 'East'],
 ['Located', 'South'],
 ['Located', 'East', 'Object', 'South', 'East'],
 ['Located', 'South']]

Add start and end tokens

In [70]:
sentences = [["<SOS>"] + s + ["<EOS>"] for s in sentences]
sentences[:5]

[['<SOS>', 'Located', 'South', '<EOS>'],
 ['<SOS>', 'Located', 'Center', 'Object', 'East', '<EOS>'],
 ['<SOS>', 'Located', 'South', '<EOS>'],
 ['<SOS>', 'Located', 'East', 'Object', 'South', 'East', '<EOS>'],
 ['<SOS>', 'Located', 'South', '<EOS>']]

Get vocabulary and one-hot encodings

In [71]:
def get_voc(sentence_list):
    words = []
    for s in sentences:
        words.extend(s)
    vocab = list(set(words))
    return vocab
vocab = get_voc(sentences)
vocab

['<SOS>',
 'South',
 'Not',
 'Located',
 'West',
 '<EOS>',
 'Object',
 'Landmark',
 'North',
 'Center',
 'East']

In [72]:
def get_word_onehots(vocab):
    dim_vocab = len(vocab)
    word_encodings = {}
    for i, w in enumerate(vocab):
        word_encodings[w] = np.eye(dim_vocab)[i]
    return word_encodings
word_encodings = get_word_onehots(vocab)
word_encodings

{'<SOS>': array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'South': array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'Not': array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'Located': array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]),
 'West': array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]),
 '<EOS>': array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]),
 'Object': array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]),
 'Landmark': array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]),
 'North': array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]),
 'Center': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]),
 'East': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])}

## Language modelling

Encode a sentence

In [76]:
def encode(sentence):
    encoded = []
    for token in sentence:
        encoded.append(word_encodings[token])
    return encoded

In [77]:
s = sentences[0]
enc = torch.Tensor(np.array(encode(s)))
enc

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])

Pass through a GRU network

In [198]:
gru = nn.GRU(len(vocab), 256, batch_first=True)
output, hidden_state = gru(enc.unsqueeze(0))
print(output.shape)
print(output)

torch.Size([1, 4, 256])
tensor([[[-0.0273, -0.0381,  0.0491,  ...,  0.0233, -0.0184, -0.0060],
         [-0.0307, -0.0049,  0.0814,  ...,  0.0138, -0.0240, -0.0258],
         [-0.0426, -0.0411,  0.0853,  ...,  0.0386, -0.0494, -0.0713],
         [-0.0202, -0.0136,  0.0417,  ...,  0.0524, -0.0694, -0.0537]]],
       grad_fn=<TransposeBackward1>)


In [94]:
print(hidden_state.shape)
print(hidden_state)

torch.Size([1, 1, 256])
tensor([[[-0.0245,  0.0025,  0.0407, -0.0674,  0.0386, -0.0226, -0.0477,
          -0.0623,  0.0702,  0.0445, -0.0867,  0.0543, -0.0297,  0.0321,
          -0.0051, -0.0245,  0.0194, -0.0044,  0.0009, -0.0593, -0.0498,
           0.0541,  0.0269,  0.0424,  0.0513, -0.0041,  0.0003, -0.0171,
           0.0191, -0.0561,  0.0725,  0.0132,  0.0229,  0.0225,  0.0101,
           0.0582,  0.0647,  0.0334,  0.0280,  0.0413, -0.0410,  0.0005,
          -0.0022,  0.0095, -0.0815,  0.0039,  0.0253, -0.0003,  0.0613,
           0.0340, -0.0445,  0.0419,  0.0109, -0.0446, -0.0284,  0.0326,
          -0.0544, -0.0198, -0.0541,  0.0600,  0.0132,  0.0467, -0.0732,
           0.0313,  0.0161, -0.0561, -0.0723, -0.0614, -0.0824, -0.0747,
           0.0415,  0.0353,  0.0226, -0.0059, -0.0896, -0.0031,  0.0057,
          -0.0100, -0.0263,  0.0264,  0.0641, -0.0525,  0.0349,  0.0260,
           0.0502,  0.0830, -0.0153,  0.0977, -0.0269, -0.0787, -0.0678,
           0.0818,  0.0977,

Padding sequences

In [225]:
a = torch.Tensor(np.array(encode(sentences[0])))
b = torch.Tensor(np.array(encode(sentences[3])))
c = torch.Tensor(np.array(encode(sentences[1])))
sentence_list = [a, b, c]
print(sentence_list)

gru = nn.GRU(len(vocab), 256, batch_first=True)

# Order
ids = sorted(range(len(sentence_list)), key=lambda x: len(sentence_list[x]), reverse=True)
print(ids)

# Sort by length of sentence
sentence_list.sort(key=len, reverse=True)
print(sentence_list)

# Pad sentences
padded = nn.utils.rnn.pad_sequence(sentence_list, batch_first=True)
print(padded.requires_grad)
print(padded)

# Pack padded sentences (to not take care of padded tokens)
lens = [len(s) for s in sentence_list]
print(lens)
packed = nn.utils.rnn.pack_padded_sequence(padded, lens, batch_first=True)
print(packed)

output, hidden_state = gru(packed)
print(output)

# Re-pad sequence of embeddings
output, sizes = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)
print(output)

[tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]), tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]), tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])]
[1, 2, 0]
[tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0

In [217]:
e.retain_grad()
e.mean().backward()

In [218]:
for param in gru.parameters():
    print(param.grad)

tensor([[ 9.6147e-09, -9.0695e-07,  0.0000e+00,  ...,  0.0000e+00,
          6.1077e-09, -1.1247e-06],
        [ 4.3696e-07,  2.5419e-06,  0.0000e+00,  ...,  0.0000e+00,
          4.2694e-07,  2.4686e-06],
        [-8.5177e-07, -4.5599e-06,  0.0000e+00,  ...,  0.0000e+00,
         -8.2568e-07, -8.3661e-06],
        ...,
        [ 9.6453e-05,  4.1863e-04,  0.0000e+00,  ...,  0.0000e+00,
          6.5748e-05,  5.8530e-04],
        [ 2.8425e-04,  7.3727e-04,  0.0000e+00,  ...,  0.0000e+00,
          1.8478e-04,  9.9559e-04],
        [ 1.3736e-04,  5.1098e-04,  0.0000e+00,  ...,  0.0000e+00,
          9.3814e-05,  7.2468e-04]])
tensor([[ 2.9201e-08,  2.3870e-07,  5.6055e-09,  ..., -6.5194e-08,
         -2.0833e-07,  2.0006e-07],
        [-9.2575e-08, -7.8011e-07, -3.9043e-08,  ...,  1.7219e-07,
          6.9074e-07, -6.5954e-07],
        [ 2.3542e-07,  1.6923e-06, -8.9774e-08,  ..., -5.8697e-07,
         -1.2204e-06,  1.0124e-06],
        ...,
        [-1.3274e-05, -8.1712e-05,  8.6187e-07

In [228]:
x = torch.arange(1., 6.)
k, i = x.topk(1)
k

tensor([5.])

In [234]:
i.item()

4