# nn.Embedding

In [14]:
train_data = 'you need to know how to code'
word_set = set(train_data.split())

word_set

{'code', 'how', 'know', 'need', 'to', 'you'}

In [15]:
vocab = {tkn: i+2 for i, tkn in enumerate(word_set)}
vocab

{'to': 2, 'code': 3, 'how': 4, 'you': 5, 'need': 6, 'know': 7}

In [11]:
for i in enumerate(word_set):
    vocab

(0, 'to')
(1, 'code')
(2, 'how')
(3, 'you')
(4, 'need')
(5, 'know')


In [16]:
vocab['<unk>'] = 0
vocab['<pad>'] = 1
vocab

{'to': 2,
 'code': 3,
 'how': 4,
 'you': 5,
 'need': 6,
 'know': 7,
 '<unk>': 0,
 '<pad>': 1}

In [18]:
import torch.nn as nn

embedding_layer = nn.Embedding(num_embeddings=len(vocab),
                              embedding_dim=3,
                              padding_idx=1)

In [19]:
embedding_layer.weight

Parameter containing:
tensor([[-0.5645,  0.5217, -0.1553],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.6815,  0.4034, -0.1700],
        [-1.0121, -0.8834,  1.1933],
        [ 1.5311, -0.0287, -0.3459],
        [-1.7482, -1.2874, -1.0154],
        [ 0.7291,  0.2138, -1.4682],
        [ 0.7311, -0.4938, -1.4890]], requires_grad=True)

# torch.randn

In [21]:
import torch

x = torch.randn(3, 4)
x

tensor([[ 0.3718,  0.8342, -0.2015,  0.0352],
        [-1.3404, -2.1532,  0.3185, -1.2879],
        [ 0.0777,  0.2026,  0.7936, -0.5208]])

# Activation function

In [22]:
# importing the PyTorch library
import torch

# A constant tensor of size 6
a = torch.FloatTensor([1.0, -0.5, 3.4, -2.1, 0.0, -6.5])
print(a)

tensor([ 1.0000, -0.5000,  3.4000, -2.1000,  0.0000, -6.5000])


In [23]:
# Applying the tanh function and storing the result in 'b'
b = torch.tanh(a)
b

tensor([ 0.7616, -0.4621,  0.9978, -0.9705,  0.0000, -1.0000])

# torch.mm

In [24]:
mat1 = torch.randn(2, 3)
mat2 = torch.randn(3, 3)

torch.mm(mat1, mat2)

tensor([[-6.8195e-01,  3.3264e-01,  1.0224e+00],
        [ 1.4460e-01,  6.1992e-01,  3.2753e-04]])

# X.view

In [25]:
t1 = torch.ones(4, 3)
t2 = t1.view(3, 4)
t3 = t1.view(12)

print(t1)
print(t2)
print(t3)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])


# optimizer.zero_grad

# loss.backward()

# optimizer.step()

# X.squeeze(), X.unsqueeze()

In [42]:
t4 = torch.rand(1, 3, 3)
t4.shape

torch.Size([1, 3, 3])

In [43]:
t4

tensor([[[0.4336, 0.3943, 0.0119],
         [0.4431, 0.5395, 0.6198],
         [0.3020, 0.6146, 0.2127]]])

In [44]:
t4.squeeze()

tensor([[0.4336, 0.3943, 0.0119],
        [0.4431, 0.5395, 0.6198],
        [0.3020, 0.6146, 0.2127]])

In [45]:
t5 = torch.rand(3, 3)
t5.shape

torch.Size([3, 3])

In [46]:
t5.unsqueeze(0).shape

torch.Size([1, 3, 3])

In [48]:
t5.unsqueeze(0)

tensor([[[0.2334, 0.7143, 0.9270],
         [0.9682, 0.4538, 0.5793],
         [0.6121, 0.1259, 0.6368]]])

In [49]:
t5.unsqueeze(1).shape

torch.Size([3, 1, 3])

In [50]:
t5.unsqueeze(1)

tensor([[[0.2334, 0.7143, 0.9270]],

        [[0.9682, 0.4538, 0.5793]],

        [[0.6121, 0.1259, 0.6368]]])

# NNLM 코드

In [52]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

dtype = torch.FloatTensor

sentences = ['i like dog ', 'i love coffee ', 'i hate milk ']
word_list = "".join(sentences).split()
word_list = list(set(word_list))
word_list

['coffee', 'love', 'milk', 'i', 'dog', 'hate', 'like']

In [53]:
word_dict = {w:i for i, w in enumerate(word_list)}
word_dict

{'coffee': 0, 'love': 1, 'milk': 2, 'i': 3, 'dog': 4, 'hate': 5, 'like': 6}

In [54]:
number_dict = {i:w for i, w in enumerate(word_list)}
number_dict

{0: 'coffee', 1: 'love', 2: 'milk', 3: 'i', 4: 'dog', 5: 'hate', 6: 'like'}

In [55]:
n_class = len(word_dict)
n_class

7

In [56]:
#NNLM parameters
n_step = 2    # n-1 in paper
n_hidden = 2  # h in paper
m = 2         # m in paper

In [57]:
def make_batch(sentences):
    
    input_batch = []
    target_batch = []
    
    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]
        
        input_batch.append(input)
        target_batch.append(target)
    
    return input_batch, target_batch

In [58]:
# Model

class NNLM(nn.Module):
    
    def __init__(self):
        super(NNLM, self).__init__()
        self.C = nn.Embedding(n_class, m)
        self.H = nn.Parameter(torch.randn(n_step*m, n_hidden).type(dtype))
        self.W = nn.Parameter(torch.randn(n_step*m, n_class).type(dtype))
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
        self.b = nn.Parameter(torch.randn(n_class).type(dtype))
        
    def forward(self, X):
        X = self.C(X)
        X = X.view(-1, n_step*m)  #[batch_size, n_step*n_class]
        tanh = torch.tanh(self.d + torch.mm(X, self.H))  #[batch_size, n_hidden]
        output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U)  #[batch_size, n_class]
        return output

In [59]:
model = NNLM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
input_batch, target_batch = make_batch(sentences)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))

In [61]:
# Training

for epoch in range(5000):
    optimizer.zero_grad()
    output = model(input_batch)
    
    loss = criterion(output, target_batch)
    if(epoch + 1)%1000 == 0:
        print('Epoch:', '%04d'%(epoch+1), 'cost=','{:.6f}'.format(loss))
    
    loss.backward()
    optimizer.step()

Epoch: 1000 cost= 0.001469
Epoch: 2000 cost= 0.000788
Epoch: 3000 cost= 0.000426
Epoch: 4000 cost= 0.000230
Epoch: 5000 cost= 0.000124


In [64]:
# predict
predict = model(input_batch).data.max(1, keepdim=True)[1]

print(input_batch)
print(predict)

tensor([[3, 6],
        [3, 1],
        [3, 5]])
tensor([[4],
        [0],
        [2]])


In [65]:
# test
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']
