In [1]:
import Data

In [2]:
import Component as C

In [3]:
import torch
import torch.nn as nn

# data

In [4]:
corpus = Data.Data('fra_clean.txt',10000)

In [5]:
# encoder
corpus_tensor_en, vocab_size_en, word2idx_en, idx2word_en = corpus.encoder()

In [6]:
corpus_tensor_en.shape

torch.Size([10000, 4, 2270])

In [7]:
# decoder
corpus_tensor_de, vocab_size_de, word2idx_de, idx2word_de = corpus.decoder()

In [8]:
corpus_tensor_de.shape

torch.Size([10000, 10, 4704])

In [9]:
# target
target = corpus.target()

In [10]:
target.shape

torch.Size([10000, 10])

# model

## encoder

batch_size, seq_length, input_size, hidden_size, num_heads, ff_size

In [11]:
corpus_tensor_de.shape

torch.Size([10000, 10, 4704])

In [12]:
batch_size, seq_length, input_size = corpus_tensor_de.shape

num_heads = 2
ff_size = 100
embed_size = 100
hidden_size = 128

encoder = C.Encoder(batch_size, seq_length, input_size, embed_size, num_heads)

output = encoder(corpus_tensor_de, mask =False)

In [13]:
output.shape

torch.Size([10000, 10, 100])

In [14]:
output[0][0][0:10]

tensor([-0.6638,  0.7475, -1.2316, -0.2081,  0.4016,  2.6609, -0.4417,  0.0291,
         1.3483, -0.1894], grad_fn=<SliceBackward0>)

## decoder

batch_size, num_heads, seq_de, input_de, hidden_size, ff_de, input_en

In [11]:
batch_size, seq_de, input_de = corpus_tensor_de.shape

In [12]:
corpus_tensor_de.shape

torch.Size([10000, 10, 4704])

In [13]:
_, seq_en, input_en = corpus_tensor_en.shape

In [14]:
num_heads = 2
embed_size = 100

In [16]:
decoder = C.Decoder(batch_size, seq_de, input_de, seq_en, input_en, embed_size, num_heads)

In [17]:
corpus_tensor_en.shape

torch.Size([10000, 4, 2270])

In [18]:
output = decoder(corpus_tensor_en, corpus_tensor_de)

In [19]:
output.shape

torch.Size([10000, 10, 4704])

## optimization

In [20]:
criterion = nn.CrossEntropyLoss()

In [21]:
optimizer = torch.optim.Adam(decoder.parameters(), lr=0.005)

In [52]:
# Train
for epoch in range(10):
    optimizer.zero_grad()
    output = decoder(corpus_tensor_en, corpus_tensor_de)
    o = output.view(-1, output.shape[-1])
    t = target.flatten().long()
    loss = criterion(o, t)
    # if (epoch + 1) % 10 == 0:
    #     print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
        
    loss.backward()
    optimizer.step()

Epoch: 0001 cost = 8.456228
Epoch: 0002 cost = 8.453855
Epoch: 0003 cost = 8.449224
Epoch: 0004 cost = 8.424502
Epoch: 0005 cost = 8.384541
Epoch: 0006 cost = 8.297331
Epoch: 0007 cost = 8.168118
Epoch: 0008 cost = 8.020137
Epoch: 0009 cost = 7.888175
Epoch: 0010 cost = 7.814879
