# English quenya translation using a transformer - Data collection

## Main page

In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import nltk
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
import torch.nn
import torch.nn.functional as F
from model import Transformer
from sklearn.model_selection import train_test_split

url = "https://eldamo.org/content/phrase-indexes/phrases-q.html"
r = requests.get(url)

In [2]:
soup = BeautifulSoup(r.content, 'html.parser')

In [3]:
rows_english = soup.select('li')

In [4]:
rows_english[50].text

'A anamelda na ar ilyan  “A is dearest of all”\n               '

In [5]:
quenya_sentences =[]
for row in rows_english:
    quenya_sentences.append(row.text)

In [6]:
sentences = quenya_sentences[15:]

In [7]:
sentences

['a Aina Maria arca atarmë  “Holy Mary, pray for us”\n         ',
 'a Aina Neldië Eru Er órava (o)messë  “Holy Trinity, one God, have mercy on us”\n         ',
 'A carnë ta yallë B (carnë)  “A did that as / like B (did)”\n         ',
 'a Eruion Mardorunando, Eru órava (o)messë  “God, the Son, Redeemer of the world, have mercy on us”\n         ',
 'a Hrísto órava ómessë  “Christ, have mercy on us”\n         ',
 'á hyamë rámen úcarindor  “pray for us sinners”\n         ',
 'Aia María quanta Eruanno  “Hail Mary, full of grace”\n         ',
 'ai! laurië lantar lassi súrinen  “ah! like gold fall the leaves in the wind”\n            \nai! lassi lantar laurië súrinen  “ah! leaves fall golden in [by means of] the wind”\n               \n\n',
 'ai! lassi lantar laurië súrinen  “ah! leaves fall golden in [by means of] the wind”\n               ',
 'Aina Eruontarië  “Holy Mother of God”\n         ',
 'Aina Wendë mi Wenderon  “Holy Virgin of virgins”\n         ',
 '(ai)que kestanellen, (ai) tullen

In [8]:
sentences_english = []
sentences_quenya = []
for sentence in sentences:
    compteur = 0
    for i in range(len(sentence)):
        if sentence[i]=="“":
            beginning = i+1
        if sentence[i]=="”":
            ending = i
            last_ending = ending
            compteur+=1
            sentences_english.append(sentence[beginning:ending])
            if compteur==1:
                sentences_quenya.append(sentence[:beginning])
            else:
                sentences_quenya.append(sentence[last_ending:beginning])

In [9]:
len(sentences_english)

414

In [10]:
len(sentences_quenya)

414

## Poems & prayers found on the website

In [11]:
list_url = ["https://eldamo.org/content/words/word-2555725393.html", "https://eldamo.org/content/words/word-2245526111.html",
            "https://eldamo.org/content/words/word-671674147.html", "https://eldamo.org/content/words/word-311699583.html",
            "https://eldamo.org/content/words/word-2920398593.html", "https://eldamo.org/content/words/word-3295893985.html",
            "https://eldamo.org/content/words/word-2124111669.html", "https://eldamo.org/content/words/word-4161205007.html",
            "https://eldamo.org/content/words/word-436003197.html", "https://eldamo.org/content/words/word-2774144071.html",
            "https://eldamo.org/content/words/word-3330342599.html", "https://eldamo.org/content/words/word-1216507117.html",
            "https://eldamo.org/content/words/word-2721399773.html", "https://eldamo.org/content/words/word-1235857611.html"]
for url in list_url:
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    rows_english = soup.select('td')
    for i in range(0,len(rows_english)):
        if i%2==0:
            sentences_quenya.append(rows_english[i].text)
        else:
            sentences_english.append(rows_english[i].text)

In [12]:
len(sentences_english)

653

In [13]:
len(sentences_quenya)

653

## Dictionary pulled from Eldamo

In [14]:
url = "https://eldamo.org/content/vocabulary-indexes/vocabulary-words-nq.html?neo"
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
rows = soup.select('dt')

In [15]:
words = []
for row in rows:
    words.append(row.text)
    

In [16]:
words_english = []
words_quenya = []
for word in words:
    compteur=0
    for i in range(len(word)):
        if ((word[i]==" ") | (word[i]=="-"))&(compteur==0)&(i!=0):
            ending_quenya = i
            words_quenya.append(word[:ending_quenya])
            compteur+=1
        if word[i]=="“":
            beginning = i+1 
        if word[i]=="”":
            ending = i
            words_english.append(word[beginning:ending])
            break
            
            

In [17]:
len(words_english)

4921

In [18]:
len(words_quenya)

4921

In [19]:
sentences_english = np.concatenate([sentences_english, words_english])

In [20]:
sentences_quenya = np.concatenate([sentences_quenya, words_quenya])

In [21]:
list_english = []
list_quenya = []
for sentence in sentences_english:
    list_english.append(list(sentence))
for sentence in sentences_quenya:
    list_quenya.append(list(sentence))

In [22]:
m = 0
for i in range(len(sentences_english)):
    if len(list_english[i])>m: 
        m = len(list_english[i])
    if len(list_quenya[i])>m:
        m = len(list_quenya[i])

In [23]:
m

146

## Tokenization

In [24]:
char2index = {}
index2char = {}
counter = 1
for i in range(len(sentences_english)):
    sent_english = sentences_english[i]
    sent_quenya = sentences_quenya[i]
    for w in sent_english:
        if w not in char2index:
            counter+=1
            char2index[w] = counter
            index2char[counter] = w
    for w in sent_quenya:
        if w not in char2index:
            counter+=1
            char2index[w] = counter
            index2char[counter] = w
char2index['<EOS>'] = counter+1
index2char[counter+1] = '<EOS>'

In [25]:
data_en = np.zeros([len(list_english), m+1])
data_quenya = np.zeros([len(list_english), m+1])
data_quenya.shape

(5574, 147)

In [26]:
for i in range(len(list_english)):
    for j in range(len(list_english[i])):
        data_en[i,j] = char2index[list_english[i][j]]
    data_en[i,len(list_english[i])] = char2index['<EOS>']
    for j in range(len(list_quenya[i])):
        data_quenya[i,j] = char2index[list_quenya[i][j]]
    data_quenya[i,len(list_quenya[i])] = char2index['<EOS>']

In [27]:
a =         [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0, 0]

In [28]:
L=[]
for d in range(len(data_quenya)):
    if np.array_equal(a,data_quenya[d]):
        L.append(d)

In [29]:
data_en = np.delete(data_en,L,0)
data_quenya = np.delete(data_quenya,L,0)

# Model definition

In [30]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [31]:
VOC_SIZE = len(char2index)+2
EMB_SIZE = 512
NHEAD = 4
HID_DIM = 1024
BATCH_SIZE = 40
NUM_ENCODER_LAYERS = 4
NUM_DECODER_LAYERS = 4

In [32]:
transformer = Transformer(NUM_ENCODER_LAYERS, NUM_DECODER_LAYERS, EMB_SIZE, NHEAD, HID_DIM, 0.01, VOC_SIZE)

In [33]:
transformer = transformer.to(DEVICE)

In [34]:
en_train, en_test, qu_train, qu_test = train_test_split(data_en, data_quenya, test_size = 0.2)

In [35]:
en_train = torch.Tensor(en_train).long()
en_test = torch.Tensor(en_test).long()
qu_train = torch.Tensor(qu_train).long()
qu_test = torch.Tensor(qu_test).long()

In [36]:
train_dataset = TensorDataset(en_train, qu_train)
test_dataset = TensorDataset(en_test, qu_test)

In [37]:
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)

## Masks creation

In [38]:
def positionalEncoding(length, embed_dim):
    angles = np.array([[pos/10000**(2*(i//2)) for i in range(embed_dim)] for pos in range(length)])
    pos_encoding = np.zeros((length, embed_dim))
    pos_encoding[:,0::2] = np.sin(angles[:,0::2])
    pos_encoding[:,1::2] = np.cos(angles[:,1::2])
    return torch.from_numpy(pos_encoding)

In [39]:
def create_look_ahead_mask(size):
    mask = torch.triu(torch.ones(size, size), diagonal=1).to(torch.bool)
    return mask

In [40]:
def padding_mask(x):
    return x==0

In [41]:
pos_encoding = positionalEncoding(m+1, EMB_SIZE).to(DEVICE)
attn_mask = create_look_ahead_mask(m+1).to(DEVICE)


## Training

In [42]:
criterion = torch.nn.CrossEntropyLoss()
transformer.train()
num_epochs = 10
optimizer = torch.optim.Adam(transformer.parameters(), lr=1e-4)
torch.autograd.set_detect_anomaly(True) 
for epoch in range(num_epochs):
    # Iterate over the batches in the train_dataloader
    for batch_idx, batch in enumerate(train_dataloader):
        # Get the batch of input sentences and labels
        sentences = batch[0].to(DEVICE)
        labels = batch[1].to(DEVICE)
        src_padding_mask = padding_mask(sentences).to(DEVICE)
        tgt_padding_mask = padding_mask(labels).to(DEVICE)
        # Reset the gradients
        
        optimizer.zero_grad()
        # Forward pass
        outputs = transformer(sentences, labels, src_padding_mask, tgt_padding_mask, attn_mask, pos_encoding)
        print(torch.argmax(outputs,-1))
        
        # Compute the loss
        loss = criterion(outputs.view(-1,VOC_SIZE,m+1), labels)
        torch.nn.utils.clip_grad_norm_(transformer.parameters(), 0.5)

        # Backward pass
        loss.backward()
        # Update the weights
        optimizer.step()

        # Print the loss every 100 batches
        if (batch_idx + 1) % 5 == 0:
            print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, batch_idx+1, len(train_dataloader), loss.item()))

tensor([[ 15,  39,  39,  ...,  70,  70,  70],
        [ 85,  12,  95,  ...,  70,  70,  70],
        [ 15,  18,  59,  ...,  70,  70,  70],
        ...,
        [100,  68,  39,  ...,  70,  70,  70],
        [100,  39,  43,  ...,  70,  70,  70],
        [100,  39,  68,  ...,  70,  70,  70]], device='cuda:0')
tensor([[18, 15, 15,  ..., 27, 27, 27],
        [15, 15, 27,  ..., 27, 27, 27],
        [15, 15, 15,  ..., 15, 15, 15],
        ...,
        [27, 27, 15,  ..., 27, 27, 27],
        [15, 27, 15,  ..., 27, 27, 27],
        [27, 27, 18,  ..., 30, 30, 30]], device='cuda:0')
tensor([[27, 27, 27,  ..., 27, 27, 27],
        [27, 15, 27,  ..., 27, 27, 27],
        [15, 15, 27,  ..., 27, 27, 27],
        ...,
        [27, 15, 15,  ..., 27, 27, 27],
        [15, 15, 15,  ..., 27, 27, 27],
        [27, 15, 27,  ..., 27, 27, 27]], device='cuda:0')
tensor([[27, 27, 27,  ..., 27, 27, 27],
        [15, 27, 27,  ..., 27, 27, 27],
        [27, 27, 27,  ..., 27, 27, 27],
        ...,
        [27, 27, 1

Epoch [1/10], Batch [30/112], Loss: 4.7403
tensor([[27, 27, 27,  ..., 28, 28, 28],
        [27, 27, 27,  ..., 28, 28, 28],
        [27, 27, 27,  ..., 28, 28, 28],
        ...,
        [27, 27, 27,  ..., 28, 28, 28],
        [27, 27, 27,  ..., 28, 27, 28],
        [27, 27, 27,  ..., 27, 28, 27]], device='cuda:0')
tensor([[27, 27, 27,  ..., 30, 30, 30],
        [27, 27, 27,  ..., 30, 30, 30],
        [27, 27, 27,  ..., 30, 30, 30],
        ...,
        [27, 27, 27,  ..., 27, 27, 27],
        [27, 27, 27,  ..., 30, 30, 30],
        [15, 27, 27,  ..., 30, 30, 30]], device='cuda:0')
tensor([[27, 27, 27,  ..., 30, 30, 30],
        [27, 27, 27,  ..., 30, 30, 30],
        [27, 27, 27,  ..., 30, 30, 30],
        ...,
        [27, 27, 27,  ..., 30, 30, 30],
        [27, 27, 27,  ..., 30, 30, 30],
        [27, 27, 27,  ..., 30, 30, 30]], device='cuda:0')
tensor([[27, 27, 27,  ..., 30, 30, 30],
        [27, 27, 27,  ..., 30, 30, 30],
        [27, 27, 27,  ..., 30, 30, 30],
        ...,
        [27

Epoch [1/10], Batch [60/112], Loss: 4.7397
tensor([[27, 27, 27,  ..., 27, 27, 27],
        [27, 27, 27,  ..., 27, 27, 27],
        [27, 27, 27,  ..., 27, 27, 27],
        ...,
        [27, 15, 15,  ..., 29, 29, 29],
        [27, 27, 27,  ..., 27, 27, 27],
        [27, 27, 27,  ..., 27, 27, 27]], device='cuda:0')
tensor([[27, 27, 27,  ..., 30, 30, 30],
        [27, 27, 15,  ..., 30, 30, 30],
        [27, 27, 15,  ..., 30, 30, 30],
        ...,
        [27, 27, 15,  ..., 30, 30, 29],
        [27, 27, 27,  ..., 30, 30, 30],
        [15, 15, 15,  ..., 26, 26, 26]], device='cuda:0')
tensor([[30, 30, 27,  ..., 26, 26, 26],
        [30, 27, 27,  ..., 30, 30, 30],
        [30, 27, 30,  ..., 30, 30, 30],
        ...,
        [27, 30, 27,  ..., 30, 30, 30],
        [28, 30, 27,  ..., 30, 30, 30],
        [30, 27, 30,  ..., 30, 30, 30]], device='cuda:0')
tensor([[30, 27, 30,  ..., 30, 30, 30],
        [30, 30, 27,  ..., 30, 30, 30],
        [28, 27, 15,  ..., 30, 30, 30],
        ...,
        [30

Epoch [1/10], Batch [90/112], Loss: 4.7388
tensor([[27, 27, 30,  ..., 92, 92, 92],
        [18, 15, 27,  ..., 92, 92, 92],
        [28, 28, 27,  ..., 92, 92, 92],
        ...,
        [30, 30, 27,  ..., 92, 92, 92],
        [28, 27, 30,  ..., 92, 92, 92],
        [27, 27, 18,  ..., 92, 92, 92]], device='cuda:0')
tensor([[28, 18, 15,  ..., 92, 92, 92],
        [28, 28, 30,  ..., 92, 92, 92],
        [30, 30, 27,  ..., 92, 92, 92],
        ...,
        [28, 30, 27,  ..., 92, 92, 92],
        [30, 30, 30,  ..., 30, 30, 30],
        [28, 28, 30,  ..., 92, 92, 92]], device='cuda:0')
tensor([[28, 30, 30,  ..., 92, 92, 92],
        [28, 30, 30,  ..., 28, 28, 28],
        [28, 18, 15,  ..., 30, 92, 30],
        ...,
        [28, 30, 30,  ..., 28, 28, 28],
        [30, 30, 30,  ..., 92, 92, 92],
        [30, 30, 27,  ..., 27, 27, 27]], device='cuda:0')
tensor([[28, 30, 30,  ..., 30, 30, 30],
        [30, 30, 30,  ..., 30, 30, 30],
        [30, 30, 30,  ..., 30, 30, 30],
        ...,
        [30

tensor([[ 28,  27,  27,  ..., 107, 107, 107],
        [ 30,  30,  27,  ...,  29,  29,  29],
        [ 30,  30,  30,  ...,  88,  88,  88],
        ...,
        [ 28,  27,  27,  ...,  92,  92,  92],
        [ 27,  27,  27,  ...,  31,  31,  31],
        [ 28,  27,  30,  ...,  92,  92,  92]], device='cuda:0')
tensor([[ 28,  28,  28,  ...,  92,  92,  92],
        [ 28,  27,  28,  ...,  92,  92,  92],
        [ 30,  30,  18,  ...,  30,  30,  30],
        ...,
        [ 28,  28,  27,  ..., 107, 107, 107],
        [ 28,  18,  15,  ...,  92,  92,  92],
        [ 28,  27,  28,  ...,  92,  92,  92]], device='cuda:0')
tensor([[27, 18, 18,  ..., 88, 88, 88],
        [27, 29, 29,  ..., 29, 29, 29],
        [28, 28, 28,  ..., 92, 92, 92],
        ...,
        [30, 27, 27,  ..., 31, 31, 31],
        [30, 27, 30,  ..., 29, 29, 29],
        [28, 28, 27,  ..., 92, 92, 92]], device='cuda:0')
Epoch [2/10], Batch [10/112], Loss: 4.7374
tensor([[28, 28, 28,  ..., 92, 92, 92],
        [30, 30, 30,  ..., 31, 2

Epoch [2/10], Batch [35/112], Loss: 4.7384
tensor([[ 30,  30,  30,  ...,  88,  88,  88],
        [ 28,  28,  28,  ...,  28,  92,  28],
        [ 28,  28,  28,  ...,  92,  92,  92],
        ...,
        [ 28,  18,  15,  ..., 107, 107, 107],
        [ 18,  15,  15,  ...,  88,  88,  88],
        [ 28,  28,  28,  ...,  92,  92,  92]], device='cuda:0')
tensor([[18, 15, 15,  ..., 29, 88, 88],
        [28, 28, 28,  ..., 92, 92, 92],
        [28, 28, 28,  ..., 27, 27, 27],
        ...,
        [28, 28, 28,  ..., 92, 92, 92],
        [28, 28, 28,  ..., 92, 92, 92],
        [27, 27, 28,  ..., 27, 27, 27]], device='cuda:0')
tensor([[28, 28, 28,  ..., 92, 92, 92],
        [28, 28, 28,  ..., 92, 92, 92],
        [30, 30, 27,  ..., 29, 88, 29],
        ...,
        [28, 18, 15,  ..., 92, 92, 92],
        [30, 30, 30,  ..., 29, 88, 88],
        [30, 18, 15,  ..., 29, 29, 29]], device='cuda:0')
tensor([[28, 28, 28,  ..., 92, 92, 92],
        [27, 27, 27,  ..., 27, 27, 27],
        [28, 28, 28,  ..., 9

tensor([[ 28,  28,  28,  ..., 107, 107, 107],
        [ 30,  30,  30,  ...,  88,  88,  88],
        [ 28,  30,  27,  ...,  79,  79,  79],
        ...,
        [ 28,  28,  27,  ...,  92,  92,  92],
        [ 30,  30,  30,  ...,  88,  88,  88],
        [ 30,  30,  30,  ...,  30,  88,  88]], device='cuda:0')
tensor([[ 30,  30,  30,  ...,  88,  88,  88],
        [ 30,  30,  30,  ...,  30,  30,  30],
        [ 30,  30,  30,  ...,  30,  26,  26],
        ...,
        [ 30,  30,  30,  ...,  79,  79,  79],
        [ 30,  30,  30,  ...,  26,  26,  26],
        [ 28,  28,  28,  ..., 107, 107, 107]], device='cuda:0')
Epoch [2/10], Batch [65/112], Loss: 4.7371
tensor([[ 27,  27,  27,  ...,  26,  26,  26],
        [ 30,  30,  18,  ...,  27,  27,  27],
        [ 30,  26,  18,  ...,  26,  26,  26],
        ...,
        [ 28,  28,  28,  ...,  31,  92,  92],
        [ 30,  30,  30,  ...,  88,  88,  88],
        [ 28,  28,  28,  ..., 107, 107, 107]], device='cuda:0')
tensor([[28, 18, 15,  ..., 92, 92, 9

tensor([[ 28,  18,  15,  ...,  92,  92,  92],
        [ 28,  28,  28,  ...,  92,  92,  92],
        [ 30,  27,  27,  ...,  88,  88,  88],
        ...,
        [ 28,  28,  28,  ..., 107, 107, 107],
        [ 30,  30,  30,  ...,  88,  88,  88],
        [ 28,  28,  28,  ...,  92,  92,  92]], device='cuda:0')
tensor([[28, 28, 28,  ..., 92, 92, 92],
        [28, 30, 28,  ..., 92, 92, 92],
        [28, 18, 15,  ..., 92, 92, 92],
        ...,
        [28, 28, 27,  ..., 92, 92, 92],
        [28, 28, 28,  ..., 79, 79, 79],
        [30, 30, 30,  ..., 87, 87, 87]], device='cuda:0')
tensor([[ 28,  28,  30,  ..., 107, 107, 107],
        [ 30,  30,  30,  ...,  30,  88,  30],
        [ 28,  30,  30,  ...,  30,  88,  87],
        ...,
        [ 30,  30,  30,  ...,  88,  88,  88],
        [ 28,  28,  30,  ...,  92,  92,  92],
        [ 30,  30,  27,  ...,  27,  88,  88]], device='cuda:0')
tensor([[ 28,  28,  30,  ..., 107, 107, 107],
        [ 28,  30,  30,  ...,  30,  88,  88],
        [ 30,  30,  30,

tensor([[ 28,  28,  28,  ..., 107, 107, 107],
        [ 30,  30,  30,  ...,  59,  87,  87],
        [ 28,  28,  28,  ...,  87,  87,  87],
        ...,
        [ 28,  28,  28,  ...,  92,  92,  92],
        [ 28,  28,  28,  ...,  87,  87,  87],
        [ 28,  28,  30,  ...,  92,  92,  92]], device='cuda:0')
tensor([[ 28,  28,  28,  ...,  59,  92,  92],
        [ 28,  28,  28,  ...,  92,  92,  92],
        [ 30,  30,  18,  ...,  59,  88,  88],
        ...,
        [ 28,  28,  28,  ..., 107, 107, 107],
        [ 28,  18,  15,  ...,  91,  92,  92],
        [ 28,  28,  28,  ...,  59,  92,  92]], device='cuda:0')
tensor([[30, 18, 18,  ..., 87, 87, 87],
        [30, 30, 30,  ..., 30, 30, 30],
        [28, 28, 28,  ..., 92, 92, 92],
        ...,
        [30, 30, 30,  ..., 79, 88, 88],
        [26, 26, 28,  ..., 87, 87, 87],
        [28, 28, 27,  ..., 92, 92, 92]], device='cuda:0')
Epoch [3/10], Batch [10/112], Loss: 4.7350
tensor([[ 28,  28,  28,  ...,  92,  92,  92],
        [ 30,  30,  30,  .

tensor([[ 18,  15,  15,  ...,  53,  53,  53],
        [ 28,  28,  28,  ...,  92,  92,  92],
        [ 18,  15,  15,  ...,  88,  88,  88],
        ...,
        [ 28,  28,  28,  ...,  89,  89, 107],
        [ 28,  28,  28,  ...,  87,  87,  87],
        [ 28,  18,  15,  ...,  92,  92,  92]], device='cuda:0')
Epoch [3/10], Batch [35/112], Loss: 4.7357
tensor([[28, 28, 28,  ..., 87, 87, 87],
        [28, 28, 28,  ..., 92, 92, 92],
        [28, 28, 28,  ..., 92, 92, 92],
        ...,
        [28, 18, 15,  ..., 89, 89, 89],
        [18, 15, 15,  ..., 53, 88, 53],
        [28, 28, 28,  ..., 92, 92, 92]], device='cuda:0')
tensor([[18, 15, 15,  ..., 88, 88, 88],
        [28, 28, 28,  ..., 89, 92, 92],
        [28, 28, 28,  ..., 87, 87, 87],
        ...,
        [28, 28, 28,  ..., 92, 92, 92],
        [28, 28, 28,  ..., 92, 92, 92],
        [27, 27, 27,  ..., 86, 88, 86]], device='cuda:0')
tensor([[ 28,  28,  28,  ...,  89,  89,  92],
        [ 28,  28,  28,  ...,  79,  92,  92],
        [ 30,  3

tensor([[30, 30, 30,  ..., 33, 88, 88],
        [30, 15, 27,  ..., 36, 88, 84],
        [30, 30, 30,  ..., 79, 79, 79],
        ...,
        [30, 30, 28,  ..., 92, 92, 92],
        [30, 30, 30,  ..., 92, 92, 92],
        [30, 30, 30,  ..., 36, 88, 53]], device='cuda:0')
tensor([[31, 28, 30,  ..., 33, 89, 89],
        [30, 30, 30,  ..., 53, 53, 53],
        [28, 30, 15,  ..., 79, 88, 88],
        ...,
        [30, 27, 27,  ..., 33, 92, 92],
        [30, 30, 30,  ..., 33, 88, 88],
        [30, 30, 30,  ..., 33, 88, 88]], device='cuda:0')
tensor([[ 30,  30,  30,  ...,  33,  53,  53],
        [ 30,  30,  30,  ...,  88,  88,  88],
        [ 26,  26,  26,  ...,  89,  88,  88],
        ...,
        [ 30,  30,  30,  ...,  79,  84,  84],
        [ 26,  26,  26,  ...,  89,  89,  89],
        [ 31,  30,  30,  ...,  91,  89, 107]], device='cuda:0')
Epoch [3/10], Batch [65/112], Loss: 4.7339
tensor([[ 26,  26,  26,  ...,  87,  88,  26],
        [ 30,  15,  18,  ...,  36,  88,  88],
        [ 26,  2

Epoch [3/10], Batch [90/112], Loss: 4.7323
tensor([[28, 30, 30,  ..., 79, 88, 86],
        [18, 15, 15,  ..., 36, 88, 88],
        [31, 28, 28,  ..., 36, 89, 92],
        ...,
        [30, 30, 29,  ..., 80, 80, 80],
        [31, 28, 28,  ..., 36, 89, 92],
        [26, 26, 26,  ..., 85, 85, 85]], device='cuda:0')
tensor([[ 28,  18,  15,  ...,  36,  92,  92],
        [ 30,  28,  30,  ...,  36,  92,  92],
        [ 30,  15,  27,  ...,  91,  89,  92],
        ...,
        [ 31,  28,  28,  ...,  90,  89, 107],
        [ 30,  30,  28,  ...,  87,  88,  92],
        [ 28,  28,  30,  ...,  36,  92,  92]], device='cuda:0')
tensor([[30, 30, 28,  ..., 89, 89, 92],
        [28, 30, 30,  ..., 89, 92, 92],
        [28, 18, 15,  ..., 91, 89, 92],
        ...,
        [28, 28, 27,  ..., 90, 92, 92],
        [28, 28, 28,  ..., 85, 85, 85],
        [30, 27, 27,  ..., 87, 87, 86]], device='cuda:0')
tensor([[ 28,  28,  28,  ...,  90,  89, 107],
        [ 30,  15,  27,  ...,  86,  88,  88],
        [ 28,  3

tensor([[30, 30, 30,  ..., 87, 88, 86],
        [30, 30, 30,  ..., 60, 53, 53],
        [30, 30, 30,  ..., 91, 92, 92],
        ...,
        [30, 30, 28,  ..., 36, 88, 86],
        [26, 26, 26,  ..., 85, 85, 85],
        [18, 15, 15,  ..., 90, 88, 92]], device='cuda:0')
tensor([[31, 31, 31,  ..., 36, 88, 92],
        [30, 30, 30,  ..., 90, 90, 92],
        [30, 30, 30,  ..., 85, 87, 87],
        ...,
        [30, 30, 30,  ..., 80, 92, 92],
        [30, 30, 30,  ..., 85, 87, 92],
        [30, 30, 30,  ..., 91, 92, 92]], device='cuda:0')
tensor([[ 30,  27,  27,  ...,  91,  92,  92],
        [ 31,  30,  30,  ...,  91,  92,  92],
        [ 30,  30,  18,  ...,  90,  88,  53],
        ...,
        [ 31,  30,  30,  ...,  85,  88, 107],
        [ 31,  18,  15,  ...,  91,  92,  92],
        [ 31,  30,  30,  ...,  91,  88,  92]], device='cuda:0')
tensor([[30, 18, 18,  ..., 87, 87, 87],
        [31, 31, 31,  ..., 31, 31, 26],
        [31, 30, 30,  ..., 91, 92, 92],
        ...,
        [30, 30, 3

Epoch [4/10], Batch [35/112], Loss: 4.7314
tensor([[ 31,  31,  28,  ...,  79,  87,  84],
        [ 31,  28,  30,  ...,  80,  92,  92],
        [ 31,  30,  30,  ...,  33,  89,  92],
        ...,
        [ 31,  18,  15,  ...,  89,  89, 107],
        [ 18,  15,  15,  ...,  36,  90,  53],
        [ 31,  28,  30,  ...,  91,  92,  92]], device='cuda:0')
tensor([[18, 15, 15,  ..., 88, 88, 53],
        [31, 30, 28,  ..., 36, 89, 92],
        [28, 28, 30,  ..., 36, 88, 92],
        ...,
        [31, 28, 30,  ..., 60, 92, 92],
        [31, 28, 28,  ..., 80, 92, 92],
        [28, 15, 28,  ..., 86, 86, 86]], device='cuda:0')
tensor([[31, 30, 30,  ..., 89, 89, 92],
        [31, 30, 28,  ..., 80, 89, 92],
        [30, 30, 28,  ..., 88, 88, 53],
        ...,
        [30, 18, 15,  ..., 36, 89, 92],
        [30, 30, 30,  ..., 87, 87, 88],
        [30, 18, 15,  ..., 85, 85, 85]], device='cuda:0')
tensor([[30, 28, 27,  ..., 36, 92, 92],
        [30, 28, 27,  ..., 86, 87, 86],
        [30, 28, 28,  ..., 3

tensor([[31, 31, 31,  ..., 89, 88, 92],
        [30, 30, 30,  ..., 90, 53, 53],
        [28, 28, 31,  ..., 86, 86, 92],
        ...,
        [31, 15, 27,  ..., 36, 92, 92],
        [31, 30, 30,  ..., 87, 87, 84],
        [30, 30, 30,  ..., 88, 88, 86]], device='cuda:0')
tensor([[30, 30, 30,  ..., 33, 88, 53],
        [30, 30, 30,  ..., 88, 88, 86],
        [26, 26, 26,  ..., 91, 87, 92],
        ...,
        [31, 30, 30,  ..., 86, 86, 86],
        [26, 26, 26,  ..., 91, 91, 92],
        [31, 30, 30,  ..., 90, 92, 92]], device='cuda:0')
Epoch [4/10], Batch [65/112], Loss: 4.7311
tensor([[ 26,  26,  26,  ...,  60,  85,  85],
        [ 30,  15,  18,  ...,  33,  86,  86],
        [ 26,  26,  18,  ...,  85,  85,  85],
        ...,
        [ 31,  30,  30,  ...,  36,  90,  92],
        [ 30,  30,  30,  ...,  33,  88,  53],
        [ 31,  30,  30,  ...,  90,  92, 107]], device='cuda:0')
tensor([[31, 18, 15,  ..., 90, 90, 92],
        [30, 30, 30,  ..., 33, 87, 87],
        [30, 30, 30,  ..., 3

tensor([[31, 18, 15,  ..., 36, 90, 92],
        [31, 30, 30,  ..., 36, 89, 92],
        [31, 15, 27,  ..., 90, 90, 92],
        ...,
        [31, 31, 30,  ..., 90, 89, 90],
        [31, 31, 30,  ..., 87, 87, 53],
        [31, 30, 31,  ..., 91, 92, 92]], device='cuda:0')
tensor([[31, 30, 31,  ..., 36, 89, 92],
        [31, 30, 30,  ..., 36, 92, 92],
        [31, 18, 15,  ..., 90, 90, 92],
        ...,
        [31, 15, 27,  ..., 90, 92, 92],
        [30, 30, 31,  ..., 79, 85, 85],
        [30, 28, 27,  ..., 86, 86, 86]], device='cuda:0')
tensor([[ 31,  30,  30,  ...,  36,  90, 107],
        [ 30,  15,  30,  ...,  65,  88,  86],
        [ 28,  30,  30,  ...,  65,  88,  53],
        ...,
        [ 30,  30,  30,  ...,  65,  88,  53],
        [ 31,  30,  30,  ...,  36,  92,  92],
        [ 31,  15,  27,  ...,  65,  88,  84]], device='cuda:0')
tensor([[30, 30, 30,  ..., 90, 89, 90],
        [28, 30, 30,  ..., 36, 89, 90],
        [30, 30, 30,  ..., 36, 88, 85],
        ...,
        [28, 30, 3

tensor([[31, 31, 28,  ..., 33, 89, 92],
        [30, 30, 30,  ..., 90, 90, 92],
        [30, 30, 28,  ..., 65, 87, 92],
        ...,
        [31, 30, 31,  ..., 36, 92, 92],
        [28, 28, 28,  ..., 87, 87, 92],
        [31, 31, 30,  ..., 40, 90, 92]], device='cuda:0')
tensor([[31, 27, 27,  ..., 40, 92, 92],
        [31, 31, 30,  ..., 40, 92, 92],
        [30, 30, 18,  ..., 89, 89, 53],
        ...,
        [31, 31, 30,  ..., 65, 88, 92],
        [31, 18, 15,  ..., 90, 90, 92],
        [31, 30, 30,  ..., 89, 88, 92]], device='cuda:0')
tensor([[30, 18, 18,  ..., 65, 87, 87],
        [31, 16, 31,  ..., 31, 31, 31],
        [31, 31, 30,  ..., 36, 91, 92],
        ...,
        [30, 30, 30,  ..., 79, 89, 90],
        [30, 30, 28,  ..., 87, 87, 87],
        [31, 27, 27,  ..., 40, 92, 92]], device='cuda:0')
Epoch [5/10], Batch [10/112], Loss: 4.7279
tensor([[ 31,  31,  30,  ...,  40,  92,  92],
        [ 30,  31,  30,  ...,  65,  65, 104],
        [ 30,  30,  30,  ...,  91,  91,  92],
      

tensor([[18, 15, 15,  ..., 65, 88, 53],
        [30, 30, 30,  ..., 36, 89, 92],
        [30, 31, 16,  ..., 90, 89, 92],
        ...,
        [30, 30, 30,  ..., 40, 92, 92],
        [30, 28, 28,  ..., 80, 92, 92],
        [31, 15, 28,  ..., 65, 86, 86]], device='cuda:0')
tensor([[ 30,  30,  16,  ...,  61,  89,  92],
        [ 30,  30,  28,  ...,  80,  89,  92],
        [ 30,  30,  31,  ...,  36,  88,  53],
        ...,
        [ 30,  18,  16,  ...,  36,  91,  92],
        [ 31,  30,  30,  ...,  65,  88,  84],
        [ 30,  18,  15,  ...,  65, 100,  93]], device='cuda:0')
tensor([[30, 27, 27,  ..., 40, 92, 92],
        [30, 28, 27,  ..., 13, 85, 86],
        [30, 30, 16,  ..., 40, 91, 92],
        ...,
        [30, 30, 28,  ..., 36, 90, 92],
        [18, 15, 15,  ..., 65, 88, 86],
        [31, 30, 28,  ..., 65, 88, 84]], device='cuda:0')
tensor([[30, 30, 31,  ..., 90, 90, 92],
        [30, 30, 30,  ..., 65, 89, 86],
        [26, 26, 26,  ..., 93, 93, 93],
        ...,
        [30, 30, 1

tensor([[ 30,  31,  31,  ...,  65,  88,  53],
        [ 30,  31,  30,  ...,  65,  88,  53],
        [ 30,  26,  26,  ...,  65,  87,  92],
        ...,
        [ 31,  30,  30,  ...,  65, 102, 102],
        [ 30,  26,  26,  ...,  65, 102,  92],
        [ 31,  31,  30,  ...,  34,  92,  92]], device='cuda:0')
Epoch [5/10], Batch [65/112], Loss: 4.7270
tensor([[ 30,  26,  26,  ...,  85,  85,  85],
        [ 31,  28,  18,  ...,  13,  88,  53],
        [ 30,  26,  18,  ...,  36, 102,  53],
        ...,
        [ 30,  30,  30,  ...,  36,  90,  92],
        [ 30,  30,  28,  ...,  65,  88,  53],
        [ 31,  30,  30,  ...,  34,  92,  92]], device='cuda:0')
tensor([[30, 18, 16,  ..., 34, 90, 92],
        [30, 30, 30,  ..., 65, 87, 92],
        [30, 30, 30,  ..., 33, 88, 53],
        ...,
        [18, 15, 15,  ..., 13, 87, 92],
        [30, 30, 30,  ..., 34, 91, 92],
        [30, 28, 30,  ..., 13, 86, 84]], device='cuda:0')
tensor([[ 30,  30,  30,  ...,  86,  86,  53],
        [ 30,  30,  30,  .

tensor([[ 27,  30,  30,  ...,  36,  89,  92],
        [ 27,  30,  30,  ...,  34,  89,  92],
        [ 27,  18,  16,  ...,  34,  90,  92],
        ...,
        [ 27,  30,  27,  ...,  34,  91,  92],
        [ 30,  30,  28,  ...,  36,  91, 102],
        [ 30,  28,  30,  ...,  86,  86,  92]], device='cuda:0')
tensor([[ 31,  31,  30,  ...,  34,  91, 107],
        [ 31,  28,  30,  ...,  67,  88,  53],
        [ 28,  30,  30,  ...,  67,  88,  92],
        ...,
        [ 30,  30,  30,  ...,  67,  88,  53],
        [ 27,  30,  31,  ...,  34,  92,  92],
        [ 31,  28,  28,  ...,  67,  88,  84]], device='cuda:0')
tensor([[31, 31, 30,  ..., 80, 89, 92],
        [30, 30, 31,  ..., 15, 89, 55],
        [30, 30, 30,  ..., 34, 88,  9],
        ...,
        [28, 31, 28,  ..., 36, 89, 55],
        [27, 27, 31,  ..., 34, 92, 92],
        [27, 28, 28,  ..., 80, 92, 92]], device='cuda:0')
Epoch [5/10], Batch [95/112], Loss: 4.7216
tensor([[31, 18, 16,  ..., 36, 89, 55],
        [30, 18, 16,  ..., 13, 8

tensor([[ 30,  30,  30,  ...,  37,  92,  92],
        [ 30,  30,  16,  ...,  91,  92,  92],
        [ 31,  30,  18,  ...,  90,  89,  55],
        ...,
        [ 30,  31,  30,  ...,  36,  87, 107],
        [ 30,  18,  16,  ...,  91,  91,  92],
        [ 30,  30,  30,  ...,  37,  88,  92]], device='cuda:0')
tensor([[30, 18, 18,  ..., 13, 87,  9],
        [30, 30, 30,  ..., 30, 30, 30],
        [30, 27, 30,  ..., 34, 91, 92],
        ...,
        [30, 28, 30,  ..., 90, 90, 86],
        [30, 30, 31,  ..., 86, 87, 87],
        [30, 30, 27,  ..., 37, 92, 92]], device='cuda:0')
Epoch [6/10], Batch [10/112], Loss: 4.7244
tensor([[ 30,  30,  30,  ...,  37,  87,  92],
        [ 30,  30,  30,  ...,  37, 102, 100],
        [ 30,  30,  30,  ...,  37,  92,  92],
        ...,
        [ 30,  30,  31,  ...,  34,  34, 102],
        [ 30,  30,  27,  ...,  37,  90,  53],
        [ 30,  27,  16,  ...,  37,  90,  92]], device='cuda:0')
tensor([[ 30,  26,  26,  ...,  37, 100,  93],
        [ 31,  31,  18,  .

tensor([[18, 13, 13,  ..., 67, 88, 53],
        [30, 31, 16,  ..., 37, 89, 92],
        [31, 31, 30,  ..., 66, 89, 55],
        ...,
        [30, 31, 30,  ..., 66, 92, 92],
        [30, 28, 16,  ..., 66, 92, 92],
        [31, 28, 27,  ..., 65, 86,  7]], device='cuda:0')
tensor([[ 31,  31,  16,  ...,  66,  89,  92],
        [ 30,  31,  16,  ...,  66,  89,  92],
        [ 31,  30,  31,  ...,  36,  88,   7],
        ...,
        [ 30,  18,  16,  ...,  37,  90,  92],
        [ 31,  30,  31,  ...,  67,  88,  84],
        [ 30,  18,  15,  ...,  67, 100, 100]], device='cuda:0')
tensor([[31, 30, 27,  ..., 66, 92, 92],
        [31, 28, 27,  ..., 85, 85, 85],
        [31, 30, 16,  ..., 40, 92, 92],
        ...,
        [31, 31, 31,  ..., 90, 90, 92],
        [18, 13, 13,  ..., 67, 88,  7],
        [31, 30, 28,  ..., 67, 88, 84]], device='cuda:0')
tensor([[ 31,  31,  31,  ...,  90,  90,  92],
        [ 31,  31,  31,  ...,  65,  89,  55],
        [ 30,  26,  53,  ...,  67, 100,  53],
        ...,


tensor([[ 31,  31,  31,  ...,  67,  88,  55],
        [ 31,  31,  30,  ...,  67,  88,   7],
        [ 30,  30,  26,  ...,  67,  87,   7],
        ...,
        [ 31,  30,  30,  ...,  67, 102,  48],
        [ 30,  30,  26,  ...,  93, 102,  99],
        [ 31,  31,  16,  ...,  37,  89,  92]], device='cuda:0')
Epoch [6/10], Batch [65/112], Loss: 4.7239
tensor([[ 30,  30,  26,  ...,  59,  85,   9],
        [ 31,  28,  18,  ...,  67,  88,   7],
        [ 30,  26,  18,  ...,  36, 102, 102],
        ...,
        [ 31,  31,  30,  ...,  34,  90,  92],
        [ 31,  30,  28,  ...,  67,  88,  53],
        [ 31,  30,  30,  ...,  37,  92,  55]], device='cuda:0')
tensor([[30, 18, 13,  ..., 37, 90, 92],
        [30, 30, 31,  ..., 67, 87, 92],
        [31, 30, 30,  ..., 67, 88, 53],
        ...,
        [18, 13, 13,  ..., 13, 87, 48],
        [30, 30, 31,  ..., 66, 91, 92],
        [31, 28, 30,  ..., 86, 86,  7]], device='cuda:0')
tensor([[ 31,  28,  31,  ...,  86,  86,  53],
        [ 30,  28,  27,  .

tensor([[ 31,  31,  27,  ...,  37,  89,  92],
        [ 31,  31,  31,  ...,  66,  88,  92],
        [ 31,  18,  16,  ...,  66,  90,  92],
        ...,
        [ 31,  30,  27,  ...,  66,  91,  92],
        [ 28,  28,  28,  ...,  37, 102, 101],
        [ 31,  28,  30,  ...,  40,  86,  48]], device='cuda:0')
tensor([[ 31,  31,  16,  ...,  37,  91, 107],
        [ 31,  28,  27,  ...,  67,  88,   7],
        [ 28,  31,  30,  ...,  66,  88,  92],
        ...,
        [ 31,  31,  31,  ...,  67,  88,  55],
        [ 31,  31,  31,  ...,  66,  92,  92],
        [ 31,  28,  28,  ...,  67,  88,   7]], device='cuda:0')
tensor([[31, 31, 16,  ..., 66, 89, 92],
        [28, 30, 31,  ..., 66, 89, 92],
        [30, 28, 31,  ..., 34, 88,  9],
        ...,
        [28, 28, 28,  ..., 36, 89, 92],
        [31, 31, 31,  ..., 66, 92, 92],
        [31, 27, 27,  ..., 40, 92, 92]], device='cuda:0')
Epoch [6/10], Batch [95/112], Loss: 4.7205
tensor([[31, 18, 16,  ..., 37, 89, 56],
        [30, 18, 16,  ..., 15, 8

tensor([[31, 31, 31,  ..., 67, 88, 55],
        [31, 31, 30,  ..., 90, 90,  7],
        [30, 31, 31,  ..., 66, 92, 48],
        ...,
        [31, 31, 31,  ..., 37, 91, 92],
        [30, 30, 31,  ..., 67, 87, 48],
        [31, 31, 30,  ..., 83, 90, 92]], device='cuda:0')
tensor([[31, 30, 27,  ..., 40, 92, 92],
        [31, 31, 30,  ..., 40, 92, 92],
        [31, 30, 18,  ..., 67, 89, 55],
        ...,
        [31, 31, 30,  ..., 66, 87, 92],
        [31, 18, 16,  ..., 40, 91, 92],
        [31, 31, 30,  ..., 67, 88, 92]], device='cuda:0')
tensor([[31, 18, 18,  ..., 13, 87, 48],
        [31, 30, 30,  ..., 31, 30, 30],
        [31, 31, 30,  ..., 34, 91, 92],
        ...,
        [31, 31, 31,  ..., 66, 90, 86],
        [31, 31, 31,  ..., 67, 87, 48],
        [31, 30, 30,  ..., 40, 92, 92]], device='cuda:0')
Epoch [7/10], Batch [10/112], Loss: 4.7225
tensor([[ 31,  31,  30,  ...,  40,  92,  92],
        [ 30,  31,  30,  ...,  67, 100,  55],
        [ 30,  30,  30,  ...,  66,  92,  92],
      

tensor([[18, 13, 13,  ..., 40, 91, 53],
        [31, 30, 28,  ..., 40, 92, 92],
        [18, 13, 13,  ..., 67, 89,  9],
        ...,
        [31, 28, 31,  ..., 66, 87, 92],
        [31, 30, 31,  ..., 67, 87,  9],
        [31, 18, 14,  ..., 80, 89, 92]], device='cuda:0')
Epoch [7/10], Batch [35/112], Loss: 4.7229
tensor([[ 31,  31,  28,  ...,  86,  86,  84],
        [ 31,  31,  30,  ...,  40,  91,  92],
        [ 31,  31,  30,  ...,  66,  91,  92],
        ...,
        [ 31,  18,  16,  ...,  40,  89, 107],
        [ 18,  13,  13,  ...,  36,  88,  53],
        [ 31,  31,  16,  ...,  66,  92,  92]], device='cuda:0')
tensor([[18, 13, 13,  ..., 67, 88,  9],
        [31, 31, 16,  ..., 66, 89, 92],
        [31, 31, 30,  ..., 67, 89, 52],
        ...,
        [31, 31, 30,  ..., 66, 92, 92],
        [31, 28, 16,  ..., 66, 92, 92],
        [31, 28, 27,  ..., 86, 86,  7]], device='cuda:0')
tensor([[31, 31, 16,  ..., 67, 89, 92],
        [30, 31, 31,  ..., 67, 89, 92],
        [31, 31, 31,  ..., 6

tensor([[ 31,  31,  28,  ...,  67,  92,  92],
        [ 31,  31,  18,  ...,  38,  90, 107],
        [ 31,  31,  18,  ...,  68,  88, 101],
        ...,
        [ 31,  31,  18,  ...,  68,  89,  56],
        [ 31,  28,  27,  ...,  40, 101, 101],
        [ 18,  13,  13,  ...,  36,  92,   9]], device='cuda:0')
tensor([[ 31,  31,  31,  ...,  68,  89,  92],
        [ 31,  31,  28,  ...,  68, 102,  54],
        [ 31,  31,  84,  ...,  67, 102,  48],
        ...,
        [ 31,  31,  31,  ...,  66,  88,  92],
        [ 31,  31,  31,  ...,  67,  91,  92],
        [ 31,  31,  31,  ...,  65,  90,  53]], device='cuda:0')
tensor([[31, 31, 31,  ..., 68, 89, 92],
        [31, 31, 31,  ..., 68, 90, 53],
        [31, 28, 28,  ..., 79, 85,  7],
        ...,
        [31, 30, 27,  ..., 68, 88, 92],
        [31, 31, 31,  ..., 68, 87, 48],
        [31, 31, 31,  ..., 67, 88, 48]], device='cuda:0')
tensor([[ 31,  31,  31,  ...,  68,  88,  54],
        [ 31,  31,  31,  ...,  67,  88, 101],
        [ 30,  28,  26,

tensor([[31, 27, 31,  ..., 68, 88, 53],
        [31, 18, 14,  ..., 80, 89, 54],
        [31, 28, 31,  ..., 80, 88, 53],
        ...,
        [31, 31, 28,  ..., 68, 89, 54],
        [31, 30, 30,  ..., 62, 90, 54],
        [31, 28, 30,  ..., 34, 92, 54]], device='cuda:0')
Epoch [7/10], Batch [90/112], Loss: 4.7181
tensor([[ 31,  27,  27,  ...,  68,  88,  53],
        [ 18,  13,  13,  ...,  80,  88,  54],
        [ 31,  30,  31,  ...,  68,  88,  92],
        ...,
        [ 31,  30,  27,  ...,  36, 100,  48],
        [ 31,  31,  31,  ...,  68,  89,  92],
        [ 30,  26,  18,  ...,  36, 102,  97]], device='cuda:0')
tensor([[ 31,  18,  30,  ...,  38,  91,  92],
        [ 31,  28,  30,  ...,  37,  89,  92],
        [ 31,  28,  27,  ...,  68,  90,  54],
        ...,
        [ 31,  30,  28,  ...,  68,  90,  55],
        [ 31,  31,  30,  ...,  68,  88, 101],
        [ 31,  30,  31,  ...,  68,  92,  92]], device='cuda:0')
tensor([[ 31,  31,  27,  ...,  68,  89,  92],
        [ 31,  31,  31,  .

Epoch [8/10], Batch [5/112], Loss: 4.7199
tensor([[31, 31, 31,  ..., 33, 87,  9],
        [18, 15, 14,  ..., 36, 92, 86],
        [30, 31, 30,  ..., 68, 92, 92],
        ...,
        [31, 31, 31,  ..., 68, 87, 92],
        [31, 30, 31,  ..., 34, 90, 92],
        [30, 30, 27,  ..., 68, 89, 92]], device='cuda:0')
tensor([[ 31,  31,  30,  ...,  80,  87,   9],
        [ 31,  30,  30,  ...,  68,  88,  53],
        [ 31,  28,  31,  ...,  38,  90,  92],
        ...,
        [ 31,  30,  28,  ...,  34,  87,  92],
        [ 30,  28,  30,  ...,  68, 102,  55],
        [ 18,  13,  13,  ...,  68,  91,   9]], device='cuda:0')
tensor([[31, 30, 28,  ..., 80, 88, 54],
        [31, 31, 31,  ..., 38, 90,  7],
        [30, 31, 31,  ..., 66, 86, 48],
        ...,
        [31, 31, 31,  ..., 68, 91, 92],
        [30, 31, 31,  ..., 80, 87,  9],
        [31, 31, 31,  ..., 68, 90, 92]], device='cuda:0')
tensor([[ 31,  31,  27,  ...,  68,  92,  92],
        [ 31,  31,  31,  ...,  40,  92,  92],
        [ 31,  30

tensor([[28, 28, 27,  ..., 67, 86,  9],
        [30, 28, 30,  ..., 80, 88, 92],
        [31, 28, 28,  ..., 67, 87, 92],
        ...,
        [31, 28, 30,  ..., 69, 89, 92],
        [31, 18, 30,  ..., 37, 92, 92],
        [30, 28, 28,  ..., 34, 89, 92]], device='cuda:0')
tensor([[18, 13, 13,  ..., 38, 91, 53],
        [31, 28, 28,  ..., 40, 92, 92],
        [18, 13, 13,  ..., 67, 89,  9],
        ...,
        [30, 28, 28,  ..., 16, 87, 92],
        [31, 31, 31,  ..., 67, 87,  9],
        [31, 18, 14,  ..., 69, 89, 92]], device='cuda:0')
Epoch [8/10], Batch [35/112], Loss: 4.7203
tensor([[ 31,  28,  28,  ...,  79,  86,  84],
        [ 31,  28,  31,  ...,  67,  91,  92],
        [ 31,  31,  31,  ...,  66,  91,  92],
        ...,
        [ 30,  18,  30,  ...,  69,  89,   9],
        [ 18,  13,  13,  ...,  36,  53,  53],
        [ 31,  31,  30,  ...,  37, 101,  92]], device='cuda:0')
tensor([[ 18,  13,  13,  ...,  67,  88,   9],
        [ 31,  31,  28,  ...,  69,  89,  92],
        [ 31,  3

tensor([[ 31,  31,  31,  ...,  37,  92,  92],
        [ 31,  31,  31,  ...,  66,  90,  92],
        [ 31,  31,  31,  ...,  67,  88,  56],
        ...,
        [ 31,  18,  14,  ...,  62,  86,  48],
        [ 31,  31,  31,  ...,  65, 102,  48],
        [ 31,  31,  31,  ...,  68,  91,  92]], device='cuda:0')
tensor([[ 31,  31,  28,  ...,  68, 100,  92],
        [ 31,  28,  18,  ...,  38,  90,  56],
        [ 31,  28,  18,  ...,  68,  88,   7],
        ...,
        [ 31,  31,  18,  ...,  69,  89,  56],
        [ 31,  28,  30,  ...,  67, 101,  56],
        [ 18,  13,  13,  ...,  36,  92,   9]], device='cuda:0')
tensor([[ 31,  31,  31,  ...,  69,  89,   7],
        [ 31,  28,  27,  ...,  38, 102,  54],
        [ 31,  31,  84,  ...,  67,  91,  48],
        ...,
        [ 31,  31,  31,  ...,  37, 101,  92],
        [ 31,  31,  31,  ...,  69,  91,  92],
        [ 31,  31,  31,  ...,  65,  90,  75]], device='cuda:0')
tensor([[31, 28, 28,  ..., 69, 89,  8],
        [31, 31, 31,  ..., 38, 90, 53],

tensor([[ 31,  31,  18,  ...,  68,  88,  56],
        [ 31,  30,  18,  ...,  80,  88,  56],
        [ 18,  14,  18,  ...,  13,  87,   9],
        ...,
        [ 31,  30,  30,  ...,  37, 100,  56],
        [ 30,  31,  30,  ...,  36,  87, 100],
        [ 31,  18,  14,  ...,  80,  90,  92]], device='cuda:0')
tensor([[31, 31, 31,  ..., 80, 88, 53],
        [31, 18, 30,  ..., 80, 89, 30],
        [31, 31, 31,  ..., 80, 88, 53],
        ...,
        [31, 31, 27,  ..., 69, 89, 54],
        [31, 30, 30,  ..., 69, 90, 56],
        [31, 28, 31,  ..., 34, 92, 92]], device='cuda:0')
Epoch [8/10], Batch [90/112], Loss: 4.7148
tensor([[ 31,  28,  31,  ...,  80,  88,  53],
        [ 18,  14,  27,  ...,  80,  88,  54],
        [ 31,  30,  31,  ...,  80,  88,  92],
        ...,
        [ 31,  30,  28,  ...,  36,  98,  48],
        [ 31,  31,  31,  ...,  37,  89,  92],
        [ 31,  26,  18,  ...,  36, 101,  56]], device='cuda:0')
tensor([[31, 18, 30,  ..., 71, 91, 92],
        [31, 30, 31,  ..., 37, 8

Epoch [9/10], Batch [5/112], Loss: 4.7213
tensor([[31, 31, 31,  ..., 33, 87,  9],
        [18, 13, 13,  ..., 36, 92,  7],
        [30, 31, 30,  ..., 37, 92, 92],
        ...,
        [31, 31, 30,  ..., 80, 87, 52],
        [30, 30, 31,  ..., 34, 90, 92],
        [30, 31, 27,  ..., 34, 89, 92]], device='cuda:0')
tensor([[ 31,  31,  31,  ...,  66,  87,  52],
        [ 31,  30,  30,  ...,  65,  88,  53],
        [ 30,  31,  31,  ...,  38,  90,  92],
        ...,
        [ 31,  30,  28,  ...,  34,  87, 100],
        [ 30,  30,  30,  ...,  82,  98,  99],
        [ 18,  13,  27,  ...,  38,  91,  57]], device='cuda:0')
tensor([[ 30,  30,  28,  ...,  80,  88,  54],
        [ 31,  31,  31,  ...,  38,  89,   7],
        [ 31,  31,  31,  ...,  66,  87, 100],
        ...,
        [ 31,  31,  31,  ...,  71,  91,  92],
        [ 30,  31,  31,  ...,  80,  87,   9],
        [ 31,  31,  31,  ...,  67,  90,  92]], device='cuda:0')
tensor([[ 31,  31,  28,  ...,  68,  87,  92],
        [ 31,  31,  31,  ..

tensor([[31, 30, 30,  ..., 80, 88,  7],
        [30, 30, 30,  ..., 66, 91, 92],
        [30, 30, 28,  ..., 66, 91, 92],
        ...,
        [27, 30, 27,  ..., 66, 91, 92],
        [31, 28, 18,  ..., 80, 88, 30],
        [30, 30, 30,  ..., 71, 91,  7]], device='cuda:0')
tensor([[ 28,  28,  30,  ...,  86,  86,   7],
        [ 30,  30,  30,  ...,  46,  88,  30],
        [ 31,  28,  28,  ...,  46,  87, 100],
        ...,
        [ 30,  30,  30,  ...,  69,  89,  92],
        [ 30,  18,  16,  ...,  69,  92,  92],
        [ 30,  28,  30,  ...,  80,  89,  92]], device='cuda:0')
tensor([[18, 27, 27,  ..., 71, 91, 53],
        [30, 30, 28,  ..., 40, 87, 92],
        [18, 30, 13,  ..., 67, 89, 97],
        ...,
        [30, 28, 28,  ..., 16, 87, 92],
        [30, 30, 31,  ..., 80, 87,  9],
        [30, 18, 14,  ..., 80, 89, 92]], device='cuda:0')
Epoch [9/10], Batch [35/112], Loss: 4.7192
tensor([[31, 28, 28,  ..., 79, 86, 48],
        [27, 28, 31,  ..., 79, 91, 92],
        [27, 31, 30,  ..., 6

Epoch [9/10], Batch [60/112], Loss: 4.7170
tensor([[ 27,  27,  31,  ...,  36,  92,  92],
        [ 27,  30,  31,  ...,  66,  90,  92],
        [ 31,  31,  28,  ...,  68,  88,  30],
        ...,
        [ 31,  18,  14,  ...,  46,  86,  48],
        [ 31,  30,  28,  ...,  65, 102,  48],
        [ 27,  27,  31,  ...,  68,  91,  92]], device='cuda:0')
tensor([[ 27,  30,  28,  ...,  68,  87,  92],
        [ 31,  28,  18,  ...,  49,  90,   8],
        [ 28,  28,  18,  ...,  46,  88,   8],
        ...,
        [ 30,  30,  18,  ...,  69,  89,  55],
        [ 30,  28,  27,  ...,  82, 101,  56],
        [ 18,  13,  13,  ...,  71,  92,   9]], device='cuda:0')
tensor([[ 30,  30,  30,  ...,  49,  89,  30],
        [ 31,  31,  31,  ...,  67, 102,  54],
        [ 31,  28,  28,  ...,  71,  91,  48],
        ...,
        [ 27,  30,  31,  ...,  37,  88,  92],
        [ 27,  31,  31,  ...,  71,  91,  92],
        [ 31,  31,  31,  ...,  65,  90,  30]], device='cuda:0')
tensor([[31, 28, 28,  ..., 68, 89,  

tensor([[ 31,  31,  18,  ...,  68,  88,  30],
        [ 30,  30,  18,  ...,  80,  88,  30],
        [ 18,  30,  18,  ...,  13,  87,   9],
        ...,
        [ 31,  30,  30,  ...,  38,  88,  56],
        [ 30,  30,  30,  ...,  36,  87, 100],
        [ 31,  18,  14,  ...,  80,  90,  92]], device='cuda:0')
tensor([[30, 31, 30,  ..., 33, 88, 53],
        [31, 18, 14,  ..., 80, 89, 30],
        [31, 31, 30,  ..., 80, 88, 53],
        ...,
        [31, 30, 27,  ..., 68, 89, 54],
        [31, 30, 30,  ..., 38, 90, 55],
        [31, 30, 30,  ..., 34, 92, 92]], device='cuda:0')
Epoch [9/10], Batch [90/112], Loss: 4.7149
tensor([[31, 28, 28,  ..., 80, 88, 53],
        [18, 30, 27,  ..., 80, 88, 54],
        [31, 30, 31,  ..., 68, 88, 92],
        ...,
        [31, 30, 28,  ..., 36, 98, 48],
        [31, 31, 31,  ..., 37, 89, 92],
        [30, 26, 18,  ..., 38, 91, 56]], device='cuda:0')
tensor([[31, 18, 30,  ..., 71, 91, 92],
        [31, 31, 16,  ..., 37, 89, 92],
        [31, 30, 27,  ..., 6

tensor([[31, 31, 18,  ..., 69, 90, 72],
        [27, 30, 30,  ..., 46, 88,  7],
        [27, 27, 30,  ..., 69, 90, 53],
        ...,
        [31, 28, 27,  ..., 79, 86, 86],
        [31, 31, 31,  ..., 71, 98, 53],
        [27, 30, 16,  ..., 71, 90, 92]], device='cuda:0')
Epoch [10/10], Batch [5/112], Loss: 4.7168
tensor([[ 31,  31,  31,  ...,  46,  87,  57],
        [ 18,  13,  27,  ...,  71,  92,  86],
        [ 27,  27,  16,  ...,  71,  92,  92],
        ...,
        [ 31,  31,  30,  ...,  47,  87, 100],
        [ 27,  28,  31,  ...,  34,  90,  92],
        [ 27,  31,  30,  ...,  68,  89,  92]], device='cuda:0')
tensor([[ 31,  31,  31,  ...,  46,  87, 100],
        [ 31,  27,  31,  ...,  47,  88,  53],
        [ 27,  28,  19,  ...,  38,  90,  92],
        ...,
        [ 31,  30,  28,  ...,  46,  87, 100],
        [ 31,  28,  30,  ...,  40,  56,  56],
        [ 18,  13,  27,  ...,  71,  91,  56]], device='cuda:0')
tensor([[ 31,  30,  19,  ...,  46,  88,  53],
        [ 31,  31,  27,  .

tensor([[ 31,  31,  27,  ...,  47,  88,   7],
        [ 31,  31,  31,  ...,  66,  91,  92],
        [ 30,  30,  19,  ...,  66,  91,  92],
        ...,
        [ 31,  30,  19,  ...,  40,  91,  92],
        [ 31,  31,  18,  ...,  46,  88, 101],
        [ 28,  31,  31,  ...,  63,  91,  57]], device='cuda:0')
tensor([[ 28,  31,  27,  ...,  58,  86,  57],
        [ 30,  30,  30,  ...,  47,  88,  30],
        [ 31,  30,  28,  ...,  46,  87, 100],
        ...,
        [ 30,  30,  30,  ...,  63,  89,  92],
        [ 30,  18,  30,  ...,  67,  92,  92],
        [ 30,  28,  30,  ...,  34,  89,  32]], device='cuda:0')
tensor([[ 18,  13,  28,  ...,  71,  91,  53],
        [ 30,  30,  27,  ...,  40,  92,  92],
        [ 18,  30,  30,  ...,  67,  89, 101],
        ...,
        [ 30,  28,  19,  ...,  47,  87,  92],
        [ 27,  31,  31,  ...,  67,  87,   9],
        [ 30,  18,  14,  ...,  80,  89,  92]], device='cuda:0')
Epoch [10/10], Batch [35/112], Loss: 4.7185
tensor([[31, 30, 30,  ..., 79, 86, 

Epoch [10/10], Batch [60/112], Loss: 4.7161
tensor([[ 31,  31,  31,  ...,  71,  92,  92],
        [ 31,  31,  31,  ...,  66,  90,  92],
        [ 31,  31,  31,  ...,  68,  88,  30],
        ...,
        [ 31,  18,  14,  ...,  67,  86, 100],
        [ 31,  31,  27,  ...,  65, 101,  92],
        [ 31,  27,  31,  ...,  71,  91,  92]], device='cuda:0')
tensor([[ 31,  31,  28,  ...,  68,  87,  92],
        [ 31,  28,  18,  ...,  69,  90,  30],
        [ 31,  28,  18,  ...,  46,  88,  30],
        ...,
        [ 31,  31,  18,  ...,  69,  89,  55],
        [ 30,  31,  30,  ...,  68, 101,  56],
        [ 18,  27,  27,  ...,  71,  92,   9]], device='cuda:0')
tensor([[ 30,  30,  30,  ...,  69,  89,  30],
        [ 31,  31,  27,  ...,  67, 102,  54],
        [ 31,  31,  30,  ...,  80, 102,  30],
        ...,
        [ 31,  31,  19,  ...,  71,  88,  92],
        [ 31,  31,  31,  ...,  69,  91,  92],
        [ 31,  31,  30,  ...,  65,  90,  30]], device='cuda:0')
tensor([[31, 19, 28,  ..., 68, 89, 

tensor([[ 31,  31,  18,  ...,  68,  88, 101],
        [ 31,  31,  18,  ...,  46,  88,  30],
        [ 31,  31,  18,  ...,  46,  87,   9],
        ...,
        [ 31,  31,  30,  ...,  42,  88,  30],
        [ 30,  31,  31,  ...,  36,  87,   9],
        [ 31,  18,  14,  ...,  80,  90,  92]], device='cuda:0')
tensor([[ 31,  31,  31,  ...,  42,  88,  53],
        [ 31,  18,  30,  ...,  63,  89,  32],
        [103,  31,  31,  ...,  46,  88,  53],
        ...,
        [ 31,  31,  27,  ...,  68,  89,  54],
        [ 31,  30,  31,  ...,  47,  90,  55],
        [ 31,  28,  31,  ...,  34,  92,  92]], device='cuda:0')
Epoch [10/10], Batch [90/112], Loss: 4.7125
tensor([[ 31,  31,  31,  ...,  47,  88,  30],
        [ 31,  31,  27,  ...,  80,  88,  54],
        [ 30,  30,  31,  ...,  35,  88,  92],
        ...,
        [ 31,  31,  27,  ...,  35,  98,  48],
        [ 30,  30,  28,  ...,  69,  89,  92],
        [ 31,  31,  18,  ...,  38, 102,  57]], device='cuda:0')
tensor([[ 30,  18,  30,  ...,  35, 

## Inference

In [43]:
transformer.train(mode=False)

Transformer(
  (encoder): Encoder(
    (layers): ModuleList(
      (0): EncoderLayer(
        (mha): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (fullyconnected1): Linear(in_features=512, out_features=1024, bias=True)
        (relu): ReLU()
        (fullyconnected2): Linear(in_features=1024, out_features=512, bias=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      )
      (1): EncoderLayer(
        (mha): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (fullyconnected1): Linear(in_features=512, out_features=1024, bias=True)
        (relu): ReLU()
        (fullyconnected2): Linear(in_features=1024, out_features=512, bias=True)
        

In [98]:
input_sentence = np.zeros(m+1)
output_sentence = np.zeros(m+1)
generated_sentence = []
my_sentence = "This is a test sentence"
for i in range(len(my_sentence)):
    input_sentence[i] = char2index[my_sentence[i]]
input_sentence[len(my_sentence)]=char2index['<EOS>']
input_sentence = torch.Tensor(input_sentence).long().to(DEVICE)
output_sentence = torch.Tensor(output_sentence).long().to(DEVICE)
output_sentence[0]=6

In [99]:
src_padding_mask = padding_mask(input_sentence)
s=1
generated_letter = 6
while (len(generated_sentence))<10 and (generated_letter!=char2index['<EOS>']):
    tgt_padding_mask = padding_mask(output_sentence)
    tgt_padding_mask[0]=False
    print(output_sentence)
    last_output = transformer(input_sentence, output_sentence, src_padding_mask, tgt_padding_mask, attn_mask, pos_encoding)
    last_output = torch.argmax(last_output, -1)
    generated_letter = last_output[s]
    generated_sentence.append(generated_letter)
    output_sentence[s] = generated_letter
    s+=1


tensor([6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0], device='cuda:0')
tensor([ 6, 99,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     

In [100]:

L=[]
for c in output_sentence:
    if c.item()!=0:
        L.append(index2char[c.item()])
print(L)
''.join(L)

[' ', '5', '5', '5', 'l', '·', '·', '³', '5', 'U', 'ö']


' 555l··³5Uö'

In [94]:
char2index[' ']

6