In [90]:
import torch
import torch.nn as nn
import torch.optim as optim
import math

In [91]:
class InputEmbeddings(nn.Module):

    def __init__(self, d_model, vocab_size):
        super().__init__()

        self.d_model = d_model
        self.vocab_size = vocab_size

        self.embedding = nn.Embedding(self.vocab_size, self.d_model)

    def forward(self, x):
        return self.embedding(x) * math.sqrt(self.d_model)

In [92]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, seq_len, dropout):
        super().__init__()

        self.d_model = d_model
        self.seq_len = seq_len
        self.dropout = nn.Dropout(dropout)

        # To get the positional encoding, we use sine ans cos function for each (i, pos)
        # Let's build a matrix with the shape (seq_len, d_model)

        pe = torch.zeros(seq_len, d_model)

        # create a vector position with shape (seq_len, 1)
        position = torch.arange(0, seq_len, dtype=float).unsqueeze(1) # shape (seq_len, 1) with values from 0 to seq_len - 1
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        # Apply the sin and cos
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0) # shape (1, seq_len, d_model)

        self.register_buffer('pe', pe)  # make sure this value will not be updated during training

    def forward(self, x):
        x += (self.pe[:, :x.shape[1], :]).requires_grad_(False)
        return self.dropout(x)


In [93]:
class LayerNormalization(nn.Module):

    def __init__(self, epsilon = 1e-6):
        super().__init__()

        self.epsilon = epsilon
        self.alpha = nn.Parameter(torch.ones(1))
        self.bias = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        mean = x.mean(dim = -1, keepdim = True)
        std = x.std(dim = -1, keepdim = True)
        return self.alpha * (x - mean) / (std + self.epsilon) + self.bias

In [94]:
class FeedForward(nn.Module):

    def __init__(self, d_model, d_ff, dropout):
        super().__init__()

        self.linear1 = nn.Linear(d_model, d_ff) # W1 and B1
        self.linear2 = nn.Linear(d_ff, d_model) # W2 and B2
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.linear1(x)
        x = self.dropout(torch.relu(x))
        x = self.linear2(x)

        return x


In [95]:
class MultiheadAttention(nn.Module):

    def __init__(self, d_model, num_heads, dropout):
        super().__init__()

        self.d_model = d_model
        self.h = num_heads
        self.d_k = d_model // num_heads
        self.w_q = nn.Linear(d_model, d_model)
        self.w_k = nn.Linear(d_model, d_model)
        self.w_v = nn.Linear(d_model, d_model)

        self.w_o = nn.Linear(d_model, d_model)
        self.dropout = nn.Dropout(dropout)

    @staticmethod
    def attention(query, key, value, mask, dropout: nn.Dropout):
        d_k = query.shape[-1]

        # [batch, h, seq_len, d_k ] -> [batch, h, seq_len, seq_len]
        attention_score = (query @ key.transpose(-2, -1)) / math.sqrt(d_k)
        if mask is not None:
            attention_score.masked_fill_(mask == 0, -1e9)

        attention_score = attention_score.softmax(dim = -1)     # [batch, seq_len, seq_len]

        if dropout is not None:
            attention_score = dropout(attention_score)

        return (attention_score @ value), attention_score

    def forward(self, Q, K, V, mask):
        # shape of query, key, value : [batch_size, seq_len, d_model]
        query = self.w_q(Q)
        key = self.w_k(K)
        value = self.w_v(V)

        # [batch, seq_len, d_model]  -> [batch, seq_len, h, d_k] -> [batch, h, seq_len, d_k]
        query = query.view(query.shape[0], query.shape[1], self.h, self.d_k).transpose(1, 2)
        key = key.view(key.shape[0], key.shape[1], self.h, self.d_k).transpose(1, 2)
        value = value.view(value.shape[0], value.shape[1], self.h, self.d_k).transpose(1, 2)


        x, self.attention_score = MultiheadAttention.attention(query, key, value, mask, self.dropout)

        # Combine the head

        # back to the shape [batch, seq_len, d_model]
        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.h * self.d_k)

        return self.w_o(x)

In [96]:
class ResidualConnection(nn.Module):

    def __init__(self, dropout):
        super().__init__()

        self.dropout = nn.Dropout(dropout)
        self.norm = LayerNormalization()

    def forward(self, x, sublayer):
        return x + self.dropout(sublayer(self.norm(x)))

In [97]:
class EncoderBlock(nn.Module):

    def __init__(self, self_attention_block: MultiheadAttention, feed_forward_block: FeedForward, dropout):
        super().__init__()

        self.self_attention_block = self_attention_block
        self.feed_forward_block = feed_forward_block
        self.residual_connections = nn.ModuleList([ResidualConnection(dropout) for _ in range(2)])

    def forward(self, x, src_mask):
        # the src_mask is to make sure that the padding words will have no connection with the words
        x = self.residual_connections[0](x, lambda x : self.self_attention_block(x, x, x, src_mask))
        x = self.residual_connections[1](x, self.feed_forward_block)

        return x

In [98]:
class Encoder(nn.Module):

    def __init__(self, layers: nn.ModuleList):
        super().__init__()

        self.layers = layers
        self.norm = LayerNormalization()

    def forward(self, x, mask):
        for layer in self.layers:
            x = layer(x, mask)

        return self.norm(x)

In [99]:
class DecoderBlock(nn.Module):

    def __init__(self, self_attention_block: MultiheadAttention, cross_attention_block: MultiheadAttention, feed_forward_block: FeedForward, dropout: float):
        super().__init__()

        self.self_attention_block = self_attention_block
        self.cross_attention_block = cross_attention_block
        self.feed_forward_block = feed_forward_block
        self.dropout = nn.Dropout(dropout)
        self.residual_connections = nn.ModuleList([ResidualConnection(dropout) for _ in range(3)])

    def forward(self, x, encoder_output, src_mask, tgt_mask):
        x = self.residual_connections[0](x, lambda x : self.self_attention_block(x, x, x, tgt_mask))
        x = self.residual_connections[1](x, lambda x : self.cross_attention_block(x, encoder_output, encoder_output, src_mask))
        x = self.residual_connections[2](x, self.feed_forward_block)

        return x

In [100]:
class Decoder(nn.Module):

    def __init__(self, layers: nn.ModuleList):
        super().__init__()

        self.layers = layers
        self.norm = LayerNormalization()

    def forward(self, x, encoder_output, src_mask, tgt_mask):
        for layer in self.layers:
            x = layer(x, encoder_output, src_mask, tgt_mask)

        return self.norm(x)

In [101]:
# The target is to make [batch, seq_len, d_model] -> [batch, seq_len, vocab_size]

class ProjectionLayer(nn.Module):

    def __init__(self, d_model, vocab_size):
        super().__init__()

        self.proj = nn.Linear(d_model, vocab_size)

    def forward(self, x):
        return torch.log_softmax(self.proj(x), dim = -1)

In [102]:
class Transformers(nn.Module):

    def __init__ (self, encoder : Encoder, decoder: Decoder, src_embedding: InputEmbeddings, tgt_embeddings: InputEmbeddings, src_pos: PositionalEncoding, tgt_pos: PositionalEncoding, projection_layer: ProjectionLayer):
        super().__init__()

        self.encoder = encoder
        self.decoder = decoder
        self.src_embeddings = src_embedding
        self.tgt_embeddings = tgt_embeddings
        self.src_pos = src_pos
        self.tgt_pos = tgt_pos
        self.porjectionLayer = projection_layer

    def encode(self, src, src_mask):
        src = self.src_embeddings(src)
        src = self.src_pos(src)

        return self.encoder(src, src_mask)

    def decode(self, encoder_output, src_mask, tgt, tgt_mask):
        tgt = self.tgt_embeddings(tgt)
        tgt = self.tgt_pos(tgt)

        return self.decoder(tgt, encoder_output, src_mask, tgt_mask)

    def project(self, x):
        return self.porjectionLayer(x)

In [103]:
def build_transformers(src_vocab_size, tgt_vocab_size, src_seq_len, tgt_seq_len, d_model = 64, N = 3, num_heads = 8, dropout = 0.1, d_ff = 1024):
    # get the embeddings first
    src_embeddings = InputEmbeddings(d_model, src_vocab_size)
    tgt_embeddings = InputEmbeddings(d_model, tgt_vocab_size)

    # get the positional encoding
    src_pos = PositionalEncoding(d_model, src_seq_len, dropout)
    tgt_pos = PositionalEncoding(d_model, tgt_seq_len, dropout)

    # get the encoder
    encoder_blocks = []
    for _ in range(N):
        encoder_self_attention_block = MultiheadAttention(d_model, num_heads, dropout)
        feed_forward_block = FeedForward(d_model, d_ff, dropout)
        encoder_block = EncoderBlock(encoder_self_attention_block, feed_forward_block, dropout)
        encoder_blocks.append(encoder_block)

    # get the decoder
    decoder_blocks = []
    for _ in range(N):
        decoder_self_attention_block = MultiheadAttention(d_model, num_heads, dropout)
        decoder_cross_attention_block = MultiheadAttention(d_model, num_heads, dropout)
        feed_forward_block = FeedForward(d_model, d_ff, dropout)
        decoder_block = DecoderBlock(decoder_self_attention_block, decoder_cross_attention_block, feed_forward_block, dropout)
        decoder_blocks.append(decoder_block)

    encoder = Encoder(nn.ModuleList(encoder_blocks))
    decoder = Decoder(nn.ModuleList(decoder_blocks))

    # get the linear layer
    projection_layer = ProjectionLayer(d_model, tgt_vocab_size)

    transformers = Transformers(encoder, decoder, src_embeddings, tgt_embeddings, src_pos, tgt_pos, projection_layer)

    # initialize the parameters
    for p in transformers.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

    return transformers

1. # Get data from huggingface 

In [104]:
from datasets import load_dataset
from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.trainers import WordLevelTrainer
from tokenizers.pre_tokenizers import Whitespace

from pathlib import Path

from torch.utils.data import Dataset, DataLoader, random_split

def get_all_sentences(ds, lang):
    for item in ds:
        yield item['translation'][lang]

def get_tokenizers(config, ds, lang):
    tokenizer_path = Path(config['tokenizer_file'].format(lang))

    if not Path.exists(tokenizer_path):
        tokenizer = Tokenizer(WordLevel(unk_token = '[UNK]'))
        tokenizer.pre_tokenizer = Whitespace()
        trainer = WordLevelTrainer(special_tokens = ['[UNK]', '[PAD]', '[SOS]', '[EOS]'], min_freuency = 2)
        tokenizer.train_from_iterator(get_all_sentences(ds, lang), trainer=trainer)
        tokenizer.save(str(tokenizer_path))

    else:
        tokenizer = Tokenizer.from_file(str(tokenizer_path))

    return tokenizer


In [105]:
class BilingualDataset(Dataset):

    def __init__(self, ds, tokenizer_src, tokenizer_tgt, src_lang, tgt_lang, seq_len):
        super().__init__()

        self.seq_len = seq_len
        self.ds = ds
        self.tokenizer_src = tokenizer_src
        self.tokenizer_tgt = tokenizer_tgt
        self.src_lang = src_lang
        self.tgt_lang = tgt_lang

        self.sos_token = torch.tensor([tokenizer_tgt.token_to_id("[SOS]")], dtype=torch.int64)
        self.eos_token = torch.tensor([tokenizer_tgt.token_to_id("[EOS]")], dtype=torch.int64)
        self.pad_token = torch.tensor([tokenizer_tgt.token_to_id("[PAD]")], dtype=torch.int64)

    def __len__(self):
        return len(self.ds)

    def __getitem__(self, index):
        src_target_pair = self.ds[index]
        src_text = src_target_pair['translation'][self.src_lang]
        tgt_text = src_target_pair['translation'][self.tgt_lang]

        enc_input_tokens = self.tokenizer_src.encode(src_text).ids
        dec_input_tokens = self.tokenizer_tgt.encode(tgt_text).ids

        enc_num_padding_tokens = self.seq_len - len(enc_input_tokens) - 2
        dec_num_padding_tokens = self.seq_len - len(dec_input_tokens) - 1

        if enc_num_padding_tokens < 0 or dec_num_padding_tokens < 0:
            raise ValueError('Sentence is too long')

        # add sos and eos for the src
        encoder_input = torch.cat(
            [
                self.sos_token,
                torch.tensor(enc_input_tokens, dtype= torch.int64),
                self.eos_token,
                torch.tensor([self.pad_token] * enc_num_padding_tokens, dtype= torch.int64)
            ]
        )

        # add sos to the decoder input
        decoder_input = torch.cat(
            [
                self.sos_token,
                torch.tensor(dec_input_tokens, dtype= torch.int64),
                torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype= torch.int64)
            ]
        )

        # add pos to the label (what we expect from the ouput of the decoder)
        label = torch.cat(
            [
                torch.tensor(dec_input_tokens, dtype= torch.int64),
                self.eos_token,
                torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype= torch.int64)
            ]
        )

        assert encoder_input.size(0) == self.seq_len
        assert decoder_input.size(0) == self.seq_len
        assert label.size(0) == self.seq_len

        return{
            "encoder_input": encoder_input, # seq_len
            "decoder_input": decoder_input, # seq_len
            "encoder_mask": (encoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int(),   # (1, 1, seq_len)
            "decoder_mask": (decoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int() & casual_mask(decoder_input.size(0)),   # (1, 1, seq_len) & (1, seq_len)
            "label": label,
            "src_text": src_text,
            "tgt_text": tgt_text
        }


In [106]:
def casual_mask(size):
    mask = torch.triu(torch.ones(1, size, size), diagonal= 1).type(torch.int)
    return mask == 0


In [107]:
def get_ds(config):
    ds_raw = load_dataset('opus_books', f"{config['lang_src']}-{config['lang_tgt']}", split='train')

    # Build tokenizers
    tokenizer_src = get_tokenizers(config, ds_raw, config['lang_src'])
    tokenizer_tgt = get_tokenizers(config, ds_raw, config['lang_tgt'])

    # 90% for training and 10% for validation
    train_ds_size = int(0.9 * len(ds_raw))
    val_ds_size = len(ds_raw) - train_ds_size
    train_ds_raw, val_ds_raw = random_split(ds_raw, [train_ds_size, val_ds_size])

    train_ds = BilingualDataset(train_ds_raw, tokenizer_src, tokenizer_tgt, config['lang_src'], config['lang_tgt'], config['seq_len'])
    val_ds = BilingualDataset(val_ds_raw, tokenizer_src, tokenizer_tgt, config['lang_src'], config['lang_tgt'], config['seq_len'])

    max_len_src = 0
    max_len_tgt = 0

    for item in ds_raw:
        src_ids = tokenizer_src.encode(item['translation'][config['lang_src']]).ids
        tgt_ids = tokenizer_src.encode(item['translation'][config['lang_tgt']]).ids

        max_len_src = max(max_len_src, len(src_ids))
        max_len_tgt = max(max_len_tgt, len(tgt_ids))

    print(f'Max length of source sentence: {max_len_src}, target: {max_len_tgt}')

    train_data_loader = DataLoader(train_ds, batch_size= config['batch_size'], shuffle= True)
    val_dataloader = DataLoader(val_ds, batch_size= 1, shuffle= True)

    return train_data_loader, val_dataloader, tokenizer_src, tokenizer_tgt

def get_model(config, vocab_src_len, vocab_tgt_len):
    model = build_transformers(vocab_src_len, vocab_tgt_len, config['seq_len'], config['seq_len'], config['d_model'])

    return model

def get_config():
    return {
        "batch_size": 32,
        "num_epoch": 10,
        "lr": 1e-4,
        "seq_len": 350,
        "d_model": 64,
        "lang_src": "en",
        "lang_tgt": "it",
        "model_folder": "weights",
        "model_basename": "tmodel_",
        "preload": None,
        "tokenizer_file": "tokenizer_{0}.json",
        "experiment_name": "runs/tmodel"
    }

def get_weights_file_path(config, epoch):
    model_folder = config['model_folder']
    model_basename = config['model_basename']
    model_filename = f'{model_basename}{epoch}.pt'

    return str(Path('.') / model_folder / model_filename)


In [108]:
def greedy_decode(model, source, source_mask, tokenizer_src, tokenizer_tgt, max_len, device):
    sos_idx = tokenizer_tgt.token_to_id('[SOS]')
    eos_idx = tokenizer_tgt.token_to_id('[EOS]')

    # precompute the encoder output and reuse for every token we get from the decoder
    encoder_output = model.encode(source, source_mask)

    # initialize the decoder input with the sos token
    decoder_input = torch.empty(1, 1).fill_(sos_idx).type_as(source).to(device)

    while True:
        if decoder_input.size(1) == max_len:
            break

        # build the mask for target (decoder input)
        decoder_mask = casual_mask(decoder_input.size(1)).type_as(source_mask).to(device)

        # calculate the output of the decoder
        out = model.decode(encoder_output, source_mask, decoder_input, decoder_mask)

        # get the next token
        prob = model.project(out[:, -1])
        # select the next word with the highes probability
        _, next_word = torch.max(prob, dim= 1)
        decoder_input = torch.cat([decoder_input, torch.empty(1, 1).type_as(source).fill_(next_word.item()).to(device)], dim= 1)

        if next_word == eos_idx:
            break

    return decoder_input.squeeze(0)

In [109]:
def run_validaton(model, validation_ds, tokenizer_src, tokenizer_tgt, max_len, device, print_msg, global_state, num_examples = 2):
    model.eval()
    count = 0

    console_width = 80

    with torch.no_grad():
        for batch in validation_ds:
            count += 1
            encoder_input = batch['encoder_input'].to(device)
            encoder_mask = batch['encoder_mask'].to(device)

            assert encoder_input.size(0) == 1       # Make sure the batch size = 1

            model_out = greedy_decode(model, encoder_input, encoder_mask, tokenizer_src, tokenizer_tgt, max_len, device)

            source_text = batch['src_text'][0]
            target_text = batch['tgt_text'][0]
            model_out_text = tokenizer_tgt.decode(model_out.detach().cpu().numpy())

            print_msg('-'*console_width)
            print_msg(f'SOURCE: {source_text}')
            print_msg(f'TARGET: {target_text}')
            print_msg(f'PREDICTED: {model_out_text}')


            if count == num_examples:
                break


In [110]:
from tqdm import tqdm

def train_model(config):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    print(f'Using device {device}')

    Path(config['model_folder']).mkdir(parents= True, exist_ok= True)

    train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt = get_ds(config)
    model = get_model(config, tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size()).to(device)


    optimizer = optim.Adam(model.parameters(), lr = config['lr'], eps= 1e-9)

    initial_epoch = 0
    global_step = 0

    if config['preload']:
        model_filename = get_weights_file_path(config, config['preload'])
        print(f'Preloading model {model_filename}')
        state = torch.load(model_filename)
        initial_epoch = state['epoch'] + 1
        optimizer.load_state_dict(state['optimizer_state_dict'])
        global_step = state['global_step']

    loss_fn = nn.CrossEntropyLoss(ignore_index= tokenizer_src.token_to_id('[PAD]'), label_smoothing= 0.1)

    for epoch in range(initial_epoch, config['num_epoch']):
        batch_iterator = tqdm(train_dataloader, desc= f'Processing epoch {epoch : 02d}')

        for batch in batch_iterator:
            g.train()

            encoder_input = batch['encoder_input'].to(device)   # [batch, seq_len]
            decoder_input = batch['decoder_input'].to(device)   # [batch, seq_len]
            encoder_mask = batch['encoder_mask'].to(device)     # [batch, 1, 1, seq_len]
            decoder_mask = batch['decoder_mask'].to(device)     # [batch, 1, seq_len, seq_len]

            encoder_output = model.encode(encoder_input, encoder_mask)  # (batch, seq_len, d_model)
            decoder_output = model.decode(encoder_output, encoder_mask, decoder_input, decoder_mask)    # [batch, seq_len, d_model]
            proj_output = model.project(decoder_output)     # [batch, seq_len, tgt_vocab_size]

            label = batch['label'].to(device)   # (batch, seq_len)

            # (batch, seq_len, tgt_vocab_size) -> (batch * seq_len, tgt_vocab_size)
            loss = loss_fn(proj_output.view(-1, tokenizer_tgt.get_vocab_size()), label.view(-1))

            batch_iterator.set_postfix({f'loss': f'{loss.item() : 6.3f}'})

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            global_step += 1


        run_validaton(model, val_dataloader, tokenizer_src, tokenizer_tgt, config['seq_len'], device, lambda msg: batch_iterator.write(msg), global_step)

        # Save the model
        model_filename = get_weights_file_path(config, f'{epoch : 02d}')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'global_step': global_step
        }, model_filename)

In [111]:
import warnings
if __name__ == '__main__':
    warnings.filterwarnings('ignore')
    config = get_config()
    train_model(config)

Using device cuda
Max length of source sentence: 309, target: 274


Processing epoch  0: 100%|██████████| 910/910 [05:37<00:00,  2.70it/s, loss=7.323]


--------------------------------------------------------------------------------
SOURCE: Then they talked about horses, about that day's races, and how gallantly Vronsky's Atlasny had won the first prize.
TARGET: Dopo, il discorso cadde sui cavalli, sulle corse di quel giorno e su come audacemente aveva vinto il primo premio Atlasnyj di Vronskij.
PREDICTED: , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , 

Processing epoch  1: 100%|██████████| 910/910 [05:37<00:00,  2.70it/s, loss=6.818]


--------------------------------------------------------------------------------
SOURCE: Indeed, she had quite a long argument with the Lory, who at last turned sulky, and would only say, 'I am older than you, and must know better'; and this Alice would not allow without knowing how old it was, and, as the Lory positively refused to tell its age, there was no more to be said.
TARGET: Discusse lungamente col Lori, ma tosto costui le mostrò un viso accigliato, dicendo perentoriamente: — Son più vecchio di te, perciò ne so più di te; — ma Alice non volle convenirne se prima non le avesse detto quanti anni aveva. Il Lori non volle dirglielo, e la loro conversazione fu troncata.
PREDICTED: — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — 

Processing epoch  2: 100%|██████████| 910/910 [05:37<00:00,  2.70it/s, loss=6.155]


--------------------------------------------------------------------------------
SOURCE: 'P.S. – I enclose some money, which you may need for your expenses.'
TARGET: P. S. — In questa lettera è accluso il denaro che potrà essere necessario per le vostre spese».
PREDICTED: — E non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non non .
--------------------------------------------------------------------------------
SOURCE: 'Nothing,' she answered, in the same cold quiet manner.
TARGET: — Io, nulla — ella rispose con altrettanta freddezza e calma.
PREDICTED: — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — — E — E — E — E — E — — — — — — — — — — — — — — —

Processing epoch  3: 100%|██████████| 910/910 [05:37<00:00,  2.70it/s, loss=6.366]


--------------------------------------------------------------------------------
SOURCE: So little do we see before us in the world, and so much reason have we to depend cheerfully upon the great Maker of the world, that He does not leave His creatures so absolutely destitute, but that in the worst circumstances they have always something to be thankful for, and sometimes are nearer deliverance than they imagine; nay, are even brought to their deliverance by the means by which they seem to be brought to their destruction.
TARGET: Tanto poco vediamo dinanzi a noi su questa terra, e tanta ragione abbiamo di essere grati al signore dell’universo, perchè non lascia mai sì compiutamente derelitte le sue creature, che nelle condizioni anche più triste non abbiano alcun che per ringraziarlo e talvolta sieno più vicine di quanto se lo figurano al porto di loro salvezza; anzi di frequente sono condotte a questo porto da quelle circostanze medesime che pareano fatte per trascinarle alla disperaz

Processing epoch  4: 100%|██████████| 910/910 [05:37<00:00,  2.70it/s, loss=6.441]


--------------------------------------------------------------------------------
SOURCE: Then the noblemen gaily scrambled for their overcoats and all drove to the cathedral.
TARGET: E dopo di questo, i nobili presero allegramente le pellicce e andarono tutti alla cattedrale.
PREDICTED: Il suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo suo .
--------------------------------------------------------------------------------
SOURCE: She felt her head with her hand. 'Yes, my hair was done, but I don't in the least remember when.'
TARGET: Tastò la testa con la mano. — Sì, mi sono pettinata, ma quando, non lo ricordo assolutamente”.
PREDICTED: E non non non non non era un ’ ic , ma non era un ’ ic , e non era un ’ era un ’ .


Processing epoch  5: 100%|██████████| 910/910 [05:37<00:00,  2.70it/s, loss=5.919]


--------------------------------------------------------------------------------
SOURCE: "Now I am sure you can get on your feet," he said--"try."
TARGET: — Ora sono certo, — disse, — che vi reggerete in piedi; provate.
PREDICTED: — Ma non è un ’ è detto , — disse , — disse .
--------------------------------------------------------------------------------
SOURCE: Don't you believe me?
TARGET: Non mi credi forse?
PREDICTED: E non è ?


Processing epoch  6: 100%|██████████| 910/910 [05:37<00:00,  2.70it/s, loss=6.023]


--------------------------------------------------------------------------------
SOURCE: 'But what am I asking about?' he said to himself.
TARGET: Si fece pensieroso, ma immediatamente si corresse.
PREDICTED: — Non è un ’ è ? — disse .
--------------------------------------------------------------------------------
SOURCE: Her heart is always aching about somebody.
TARGET: Perché le duole il cuore per ogni cosa.
PREDICTED: E non era un ’ altra , ma non era stato .


Processing epoch  7: 100%|██████████| 910/910 [05:37<00:00,  2.70it/s, loss=5.645]


--------------------------------------------------------------------------------
SOURCE: As to going home, shame opposed the best motions that offered to my thoughts, and it immediately occurred to me how I should be laughed at among the neighbours, and should be ashamed to see, not my father and mother only, but even everybody else; from whence I have since often observed, how incongruous and irrational the common temper of mankind is, especially of youth, to that reason which ought to guide them in such cases—viz. that they are not ashamed to sin, and yet are ashamed to repent; not ashamed of the action for which they ought justly to be esteemed fools, but are ashamed of the returning, which only can make them be esteemed wise men.
TARGET: Sul tornare a casa, la vergogna rintuzzava sempre quanti migliori pensieri potessero nascermi in mente, perchè la prima idea ad occorrermi, era quella della derisione che avrei trovata fra i miei concittadini, onde arrossiva non solamente di rivede

Processing epoch  8: 100%|██████████| 910/910 [05:37<00:00,  2.70it/s, loss=5.840]


--------------------------------------------------------------------------------
SOURCE: 'It is fine, is it not?
TARGET: — Non è vero che è bella?
PREDICTED: — Non è che non è ?
--------------------------------------------------------------------------------
SOURCE: But do take a seat.'
TARGET: Ma sedetevi.
PREDICTED: Ma non è un ’ altra .


Processing epoch  9: 100%|██████████| 910/910 [05:37<00:00,  2.69it/s, loss=6.213]


--------------------------------------------------------------------------------
SOURCE: 'I quite agree with you,' said the Duchess; 'and the moral of that is--"Be what you would seem to be"--or if you'd like it put more simply--"Never imagine yourself not to be otherwise than what it might appear to others that what you were or might have been was not otherwise than what you had been would have appeared to them to be otherwise."'
TARGET: — Proprio così, — disse la Duchessa, — e la morale è questa: “Sii ciò che vuoi parere” o, se vuoi che te la dica più semplicemente: “Non credere mai d'essere diversa da quella che appari agli altri di esser o d'esser stata, o che tu possa essere, e l'essere non è altro che l'essere di quell'essere ch'è l'essere dell'essere, e non diversamente.”
PREDICTED: — Non è che non ho detto che non ho detto , — disse , — disse , — disse , ma non è un ’ altra cosa che non è un ’ altra cosa che non è stato più di me , e che non è che non è stato più più più di me 