In [37]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, TensorDataset, DataLoader
import tqdm

import import_ipynb
from Secondary_File import *
from Secondary_File import normalize, parse_line, build_vocab
from Encoder import *

from statistics import mean

In [38]:
class Decoder(nn.Module):
    def __init__(
        self, num_embeddings,
        embedding_dim=50,
        hidden_size=50,
        num_layers=1,
        dropout=0.2
    ):
        super().__init__()
        self.emb = nn.Embeddings(
            num_embeddings,
            embeddings_dim,
            padding_idx=0
        )
        self.lstm = nn.LSTM(
            embedding_dim,
            hidden_size, num_layers,
            batch_first=True,
            dropout=dropout
        )
        self.linear = nn.Linear(
            hidden_size, num_embeddings
        )
            
    def forward(self, x, h, l=None):
        x = self.emb(x)
        if l is not None:
            x = nn.utils.rnn.pack_padded_sequence(
                x, l, batch_first=True
            )
        x, h = self.lstm(x, h)

        if l is not None:
            x = nn.utils.rnn.pad_packed_sequence(
                x, batch_first=True,
                padding_value=0
            )[0]
            x = self.linear(x)
            return x, h

In [39]:
# Create Translation-Function.
def translate(input_str, enc, dec, max_len=15, device="cpu"):
    # Convert Input-String as Tensor by Numericalization it.
    words = normalize(input_str).split()
    
    input_tensor, seq_len = words2tensor(
        words, ds.src_word_dict, max_len=max_len
    )
    input_tensor = input_tensor.unsqueeze(0)
    
    # Create Length of Input as List because I use Encoder.
    seq_len = [seq_len]
    
    # Prepare for Start-Tokken
    sos_inputs = torch.tensor(sos, dtype=torch.int64)
    input_tensor = input_tensor.to(device)
    
    # Get Context by pushing Input-String to the Encoder.
    ctx = enc(input_tensor, l=seq_len)
    
    # Set Start-Token and Context as Decoder-Initialized-Value.
    z = sos_inputs
    h = ctx
    results = []
    
    for i in range(max_len):
        # Predict Next-word by using Decoder.
        o, h = dec(z.view(1, 1), h)
        
        # Linear-Layer's largest output location 
        # is the ID of the following word.
        wi = o.detach().view(-1).max(0)[1]
        
        if wi.item() == eos:
            break
        results.append(wi.item())
        
        # Use Current-Output-ID as Next-Input-Value.
        z = wi
        
    # Convert Recorded Output-ID to String.
    return " ".join(ds.trg_word_list[i] for i in results)

In [40]:
# enc = Encoder(len(ds.src_word_list), 100, 100, 2)
# dec = Decoder(len(ds.src_word_list), 100, 100, 2)
# translate("I am a student.", enc, dec)

In [41]:
"""
    Training-Model.
"""
# enc = Encoder(len(ds.src_word_list), 100, 100, 2)
# dec = Decoder(len(ds.src_word_list), 100, 100, 2)
# enc.to("cpu")
# dec.to("cpu")
# opt_enc = optim.Adam(enc.parameters(), 0.002)
# opt_dec = optim.Adam(enc.parameters(), 0.002)
loss_f = nn.CrossEntropyLoss()

In [52]:
def to2D(x):
    shapes = x.shape
    return x.reshape(shapes[0] * shapes[1], -1)


for epoch in range(30):
    # Set Neural-Network as Training-Mode.
    # enc.train(), dec.train()
    losses = []
    
    for x, lx, y, ly, in tqdm.tqdm(loader):
        # To create a PackedSequence of X, 
        # sort by descending the length 
        # of the translation source.
        lx, sort_idx = lx.sort(descending=True)
        x, y, ly = x[sort_idx], y[sort_idx], ly[sort_idx]
        x, y = x.to("cpu"), y.to("cpu")
        
        # Get Context by Inputs Translation-Source to the Encoder.
        ctx = enc(x, l=lx)
        
        # To create a PackedSequence of Y, 
        # sort by descending the length 
        # of the translation source.
        ly, sort_idx = ly.sort(descending=True)
        y = y[sort_idx]
        
        # Set the Default-Value of Decoder.
        h0 = (ctx[0][:, sort_idx, :], ctx[1][:, sort_idx, :])
        z = y[:, :-1].detach()
        
        # Change as 0 because Calculation of Embedding occurs Errors 
        # when the condition of Calculated-Result as 100.
        z[z==-100] = 0
        
        # Calculate Loss-function by Pushing-Decoder.
        o, _ = dec(z, h0, l=ly-1)
        loss = loss_fn(to2D(o[:]), to2D(y[:, 1:max(ly)]).squeeze())
        
        # Executes Error-Backpropagtion
        enc.zero_grad(), dec.zero_grad()
        loss.backward()
        opt_enc.step(), opt_dec.step()
        losses.append(loss.item())
    
    # Display current Loss-function-value or Translation-Result 
    # when the Entire-Data-Calculation has finished.
    # end.eval(), dec.eval()
    # print(epoc, mean(losses))
    
    with torch.no_grad():
        print(translate("I am a student.",
                       enc, dec, max_len=max_len, device="cpu"))
        print(translate("He likes to eat pizza.",
                       enc. dec, max_len=max_len, device="cpu"))
        print(translate("She is my mother.",
                       enc, dec, max_len=max_len, device="cpu"))