##Imports

In [1]:
from torch.utils.data import Dataset,DataLoader
from dataclasses import dataclass
import torch.nn.functional as F
from typing import Union
from tqdm import tqdm
from torch import nn
import pandas as pd
import numpy as np
import random
import string
import torch
import json
import math
import csv
import re

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [None]:
BATCH_SIZE=64
HIDDEN_SIZE=256
HEAD_NUMBERS=11
HEAD_EMBED=33

##Preproccess Data

In [None]:
def string_preprocess(sen:str):
  sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sen)
  sentence= re.sub(r'[^\w\s]', ' ', sentence)
  sentence = re.sub(r'\s+', ' ', sentence)

  return sentence.lower()

###Tokinizer

In [None]:
class TokinizerUtils():
  def __init__(self,vocab_size_limit,max_sequance_length):

    self.fit_dataset=[]
    self.longets_string_len=0

    self.max_sequance_length=max_sequance_length
    self.vocab_size_limit=vocab_size_limit


    if self.max_sequance_length!=None:
      self.longets_string_len=self.max_sequance_length

  vocabolary=dict({"PAD":0,"UNK":1,"MASK":2,"SOS":3,"EOS":4})

  def _extract_data_from_pandas_csv(self,dataframe_paths:list):
    for dataframe_path in dataframe_paths:
      dataframe=pd.read_csv(dataframe_path)

      _,column_numbers=dataframe.shape
      for column_number in range(column_numbers):
        column=dataframe.iloc[:,column_number]
        [self.fit_dataset.append(string_preprocess(str(i))) for i in column]

  def __len__(self) -> int:
    return len(self.vocabolary)

  def __getitem__(self,index_or_word)->Union[int,str]:
    if type(index_or_word)==int:
      return self._get_word_from_index(index_or_word)
    elif type(index_or_word)==str:
      return  self._get_index_from_word(index_or_word)
    else:
      raise TypeError("data type is not suported, make sure its a int or str")

  def _get_word_from_index(self,word):
    return list(self.vocabolary.keys())[list(self.vocabolary.values()).index(word)]

  def _get_index_from_word(self,idx):
    return list(self.vocabolary.keys()).index(idx)

  def _fit_vocabalary_on_dataset(self)->None:
    self._find_lenght_of_longest_string()

  def _find_lenght_of_longest_string(self)->None:
    for string in self.fit_dataset:
      if self.max_sequance_length==None:
        if len(string)>self.longets_string_len:
          self.longets_string_len=len(string)

  def fit(self)->None:
    self._find_lenght_of_longest_string()
    vocabolary=self._make_dict_of_dataset()

    most_used_words_in_order=self._sort_dict_by_most_word_count(vocabolary)
    self._add_words_to_dict_in_order(most_used_words_in_order)

  def _make_dict_of_dataset(self)->dict:
    vocabolary=dict()
    for array in self.fit_dataset:
      string_split_to_array=array.split()
      for word in string_split_to_array:
        if word not in vocabolary:
          vocabolary[word]=1
        else:
          vocabolary[word]+=1
    return vocabolary

  def _sort_dict_by_most_word_count(self,vocabolary):
    return {k: v for k, v in sorted(vocabolary.items(),reverse=True, key=lambda item: item[1])}

  def _add_words_to_dict_in_order(self,most_used_words_in_order)->None:
    for i in range(self.vocab_size_limit-len(self.vocabolary)):
      try:
        self.vocabolary[list(most_used_words_in_order.keys())[i]]=5+i
      except IndexError:
        break

In [None]:
class Tokinizer(TokinizerUtils):
  def __init__(self,vocab_size_limit,max_sequance_length=None):
    super().__init__(vocab_size_limit,max_sequance_length)

  def encode(self,string) -> torch.tensor:
    encode=[]
    encode.append(self.vocabolary["SOS"])
    encode=self._encode_string(string,encode)
    encode.append(self.vocabolary["EOS"])

    return self._pad_encoding(encode)

  def _encode_string(self,string,encode)->list:
    for word in string.split():
      if len(encode)==self.longets_string_len-1:
        break
      if word not in self.vocabolary.keys():
        encode.append(self.vocabolary["UNK"])
      else:
        encode.append(self.vocabolary[str(word)])
    return encode

  def _pad_encoding(self,encode):
    requredPaddingLength=self.longets_string_len-len(encode)
    for pad in range(requredPaddingLength):
      encode.append(self.vocabolary["PAD"])
    return encode

  def decode(self,token_tensor:torch.tensor) -> str:
    self._check_tensor_dimension(token_tensor)
    return self._decode_tensor(token_tensor)

  def _check_tensor_dimension(self,tensor):
    tensor_dimensions=len(tensor.shape)
    if tensor_dimensions>1:
      raise  ValueError(f"tensor has to many dimensions. expected 1d got {tensor_dimensions}d")

  def _decode_tensor(self,tensor):
    string_decode=""

    for token in tensor.cpu().numpy():
      if token==self.vocabolary["SOS"]:
        pass
      elif token==self.vocabolary["EOS"]:
        break
      else:
       string_decode="{} {}".format(string_decode,list(self.vocabolary.keys())[list(self.vocabolary.values()).index(token)])

    return string_decode[1:]


tokinizer=Tokinizer(max_sequance_length=33,vocab_size_limit=60000)
tokinizer._extract_data_from_pandas_csv(["/content/drive/MyDrive/shitpostCommentData.csv","/content/drive/MyDrive/preTrainingData.csv"])
tokinizer.fit()

###Dataset

In [None]:
class promptDataset(Dataset):
  def __init__(self,path:str,tokinizer):
    df=pd.read_csv(path)

    df_question=[string_preprocess(str(i)) for i in df["input"]]
    df_answer=[string_preprocess(str(i)) for i in df["target"]]


    self.question_dataset_tensor=torch.tensor([tokinizer.encode(string) for string in df_question])
    self.answer_dataset_tensor=torch.tensor([tokinizer.encode(string) for string in df_answer])


  def __len__(self):
    return len(self.question_dataset_tensor)

  def __getitem__(self,idx):
    return self.question_dataset_tensor[idx],self.answer_dataset_tensor[idx]

##Model

In [None]:
class PrintLayer(nn.Module):
    def __init__(self,layerName):
      self.layerName=layerName
      super(PrintLayer, self).__init__()

    def forward(self, x):
      print(self.layerName,x.shape)
      return x

###Encoder

In [None]:
class Encoder(nn.Module):
  def __init__(self ,input_sequance_length,  vocabalary_size, hidden_size) -> None:
    super().__init__()
    self.sequentialBlock=nn.Sequential(
      InputLayer(vocabalary_size,input_sequance_length),
      EncoderBlock(input_sequance_length, vocabalary_size, hidden_size, input_sequance_length),
      EncoderBlock(input_sequance_length, vocabalary_size, hidden_size, input_sequance_length),
      EncoderBlock(input_sequance_length, vocabalary_size, hidden_size, input_sequance_length),

      nn.Linear(input_sequance_length,vocabalary_size)
    )


  def forward(self,input:torch.LongTensor):
    # print("self.sequentialBlock(input): ",self.sequentialBlock(input).shape)


    return self.sequentialBlock(input)

###Decoder

In [None]:
class Decoder(nn.Module):
  def __init__(self ,input_sequance_length,vocabalary_size ,hidden_size) -> None:
    super().__init__()

    self.inputLayer=InputLayer(vocabalary_size,input_sequance_length)

    self.sequential=nn.Sequential(
        DecoderBlock(input_sequance_length,vocabalary_size,True,hidden_size, input_sequance_length),
        DecoderBlock(input_sequance_length,vocabalary_size,True,hidden_size, input_sequance_length),
        DecoderBlock(input_sequance_length,vocabalary_size,True,hidden_size, input_sequance_length),
        DecoderBlock(input_sequance_length,vocabalary_size,True,hidden_size, input_sequance_length),
        DecoderBlock(input_sequance_length,vocabalary_size,True,hidden_size, input_sequance_length),

        DecoderBlock(input_sequance_length,vocabalary_size,False,hidden_size, input_sequance_length),
    )

    self.reshapeEncoder=nn.Linear(vocabalary_size,input_sequance_length)

    self.output=nn.Linear(input_sequance_length,vocabalary_size)

  def forward(self,target:torch.LongTensor, encoder_output):
    reshapeEncoder=self.reshapeEncoder(encoder_output)
    posisonalEmbedding=self.inputLayer(target)

    sequential=self.sequential({posisonalEmbedding,reshapeEncoder})

    return F.softmax(self.output(sequential),-1)

###Encoder/Decoder block

In [None]:
class EncoderBlock(nn.Module):
  def __init__(self, input_sequance_length,vocabalary_size,  hidden_size, output):
    super().__init__()
    self.sequentialBlock=nn.Sequential(
      MultiHeadedAttention(vocabalary_size, input_sequance_length),
      FeedForward(input_sequance_length, hidden_size, output),
    )

  def forward(self,input):
    return self.sequentialBlock(input)

In [None]:
class DecoderBlock(nn.Module):
  def __init__(self ,input_sequance_length,vocabalary_size,return_encoder_output,  hidden_size, output):
    super().__init__()
    self.return_encoder_output=return_encoder_output


    self.maskedMultiHeadedAttention=MultiHeadedAttention(vocabalary_size, input_sequance_length)

    self.multiHeadedAttention=MultiHeadedAttention(vocabalary_size,input_sequance_length)
    self.feedForwardBlock=FeedForward(input_sequance_length, hidden_size ,output)

  def forward(self,inputData):

    input,encoder_output=inputData

    maskedMultiHeadedAttention=self.maskedMultiHeadedAttention(input)

    multiHeadedAttention=self.multiHeadedAttention(maskedMultiHeadedAttention,encoder_output)


    if self.return_encoder_output==True:
      return self.feedForwardBlock(multiHeadedAttention),encoder_output

    return self.feedForwardBlock(multiHeadedAttention)


###Sub layers

In [None]:
class InputLayer(nn.Module):
  def __init__(self,input_size,output_size):
    super().__init__()
    self.embedding=nn.Embedding(input_size,output_size)
    self.posisonalEncoding=nn.Embedding(input_size,output_size)

  def forward(self,input):
    try:
      batch,squanceLength=input.shape
    except ValueError:
      input=input.unsqueeze(0)
    finally:
      batch,squanceLength=input.shape

    embedding=self.embedding(input)
    posisonalEmbedding=self.posisonalEncoding(torch.arange(squanceLength).to(device))

    return embedding+posisonalEmbedding

In [None]:
class FeedForward(nn.Module):
  def __init__(self,input_size,hidden_size,output_size):
    super().__init__()
    self.sequentialBlock=nn.Sequential(
      nn.Linear(input_size,hidden_size),
      nn.ReLU(),
      nn.Linear(hidden_size,output_size),
    )
    self.norm=nn.LayerNorm(output_size)


  def forward(self,input):

    output=self.sequentialBlock(input)

    return self.norm(output+input)

###Masked Languange Model

In [None]:
class MaskStringVectorWithPorbebility():
  def __init__(self, tokinizer, maskToken, wordChanceOfSelection=.15 ,wordChanceOfSwapWithMaskToken=.8, wordChanceOfSwapWithRandomToken=.1, wordChanceOfstayingTheSame=.1):
    self.tokinizer=tokinizer
    self.maskToken=maskToken
    self.wordChanceOfSelection=wordChanceOfSelection
    self.wordChanceOfSwapWithMaskToken=wordChanceOfSwapWithMaskToken
    self.wordChanceOfSwapWithRandomToken=wordChanceOfSwapWithRandomToken
    self.wordChanceOfstayingTheSame=wordChanceOfstayingTheSame

  def _batchedDataset(self,inputData):
    batchedMaskedDataset=[]
    batchSize,SequanceLength=inputData.shape
    for batchNumber in range(batchSize):
      VectorSequance=inputData[batchNumber]
      batchedMaskedDataset.append(self._maskSequanceVector(VectorSequance))

    return batchedMaskedDataset

  def _maskSequanceVector(self,VectorSequance):
    vectorAfterMaksingMechanisam=[]
    for idx in range(len(VectorSequance)):
      if random.random()<self.wordChanceOfSelection:
        token=self._selectWordWithProbAndModify(idx,VectorSequance)
        vectorAfterMaksingMechanisam.append(token)
      else:
        vectorAfterMaksingMechanisam.append(VectorSequance[idx])

    return vectorAfterMaksingMechanisam

  def _selectWordWithProbAndModify(self,idx,sentanceInVectorForm):
    if random.random()<self.wordChanceOfSwapWithMaskToken:
      return self.maskToken
    elif random.random()<self.wordChanceOfSwapWithRandomToken:
      randomWord=random.randrange(len(self.tokinizer))
      return randomWord
    elif random.random()<self.wordChanceOfstayingTheSame:
      return sentanceInVectorForm[idx]
    else:
      return sentanceInVectorForm[idx]

  def maskSentance(self,inputData : torch.tensor):
    inputData=inputData.cpu().detach().numpy()
    if len(inputData.shape)==2:
      return torch.Tensor(self._batchedDataset(inputData))
    elif len(inputData.shape)==1:
      return torch.Tensor(self._maskSequanceVector(inputData))
    else:
      raise ValueError(f"expected data batch size to be 1D or 2D but resived {len(inputData.shape)}D")

In [None]:
class MaskedLanguageModel(nn.Module):
  def __init__(self,input,tokinizer,output):
    super().__init__()
    self.maskingMechanism=MaskStringVectorWithPorbebility(tokinizer,2)
    self.tokenEmbedding=nn.Embedding(input,output)
    self.positonalEmbedding=nn.Embedding(input,output)
    self.languageEmbedding=nn.Embedding(input,output)

  def forward(self,input):
    wordPorbilityToWord=input.argmax(-1)
    inputAfterMaskedMechanism=self.maskingMechanism.maskSentance(wordPorbilityToWord)


    tokenEmbedding=self.tokenEmbedding(wordPorbilityToWord)
    positonalEmbedding=self.positonalEmbedding(wordPorbilityToWord)
    languageEmbedding=self.languageEmbedding(wordPorbilityToWord)

    embeddings=languageEmbedding+positonalEmbedding+tokenEmbedding


    return F.softmax(embeddings,-1)

###Attention Mechanism

In [None]:
class MultiHeadedAttention(nn.Module):
  def __init__(self,vocabalary_size, output_size):
    super().__init__()
    self.attention=nn.MultiheadAttention(tokinizer.longets_string_len,HEAD_NUMBERS)
    self.norm=nn.LayerNorm(output_size)

  def forward(self,input,encoder_output=None):

    if encoder_output!=None:
      attn, _=self.attention(input,encoder_output,encoder_output)
    else:

      attn, _=self.attention(input,input,input)
    # print("attention: ",attn)
    output=self.norm(input+attn)

    return output


###Seq2Seq

In [None]:
class Seq2Seq(nn.Module):
  def __init__(self, encoder, decoder, device, target_vocab_size):
    super().__init__()
    self.encoder=encoder
    self.decoder=decoder
    self.device=device
    self.target_vocab_size=target_vocab_size

  def forward(self,input:torch.LongTensor, target:torch.LongTensor,softmax=False):
    encoderOutput=self.encoder(input)

    decoderOutput=self.decoder(target,encoderOutput)

    if softmax==True:
      return F.softmax(decoderOutput,-1)
    return decoderOutput

  def generate_tokens(self, input, start_token, end_token, max_length=512):
    batch_size, seq_len=input.shape

    encoder_output=self.encoder(input)

    target_sequence=torch.tensor([[[0]*self.target_vocab_size]*max_length]).long().to(device)
    target_batch,target_seq_len,_=target_sequence.shape

    for batch in range(target_batch):
      for token_index in range(target_seq_len):

        decoder_output=self.decoder(target_sequence.argmax(-1),encoder_output)

        next_token_probs=F.softmax(decoder_output,-1)
        next_token_index = torch.multinomial(next_token_probs[:, -1, :], num_samples=self.target_vocab_size)



        target_sequence[batch][token_index]=next_token_index.long().to(device)
        if next_token_index.argmax(-1)==end_token:
          break
    return target_sequence


TOKINIZER_VOCAB=len(tokinizer)

encoder=Encoder(tokinizer.longets_string_len, TOKINIZER_VOCAB, HIDDEN_SIZE).to(device)
# encoder.load_state_dict(torch.load("/content/drive/MyDrive/preTrainedEncoder_2.pth",map_location=device))

decoder=Decoder(tokinizer.longets_string_len, TOKINIZER_VOCAB, HIDDEN_SIZE).to(device)

seq2seq=Seq2Seq(encoder,decoder,device,TOKINIZER_VOCAB).to(device)
# seq2seq.load_state_dict(torch.load("/content/drive/MyDrive/preTrainedTransformer_2.pth",map_location=device))
# seq2seq

HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11
HEAD_NUMBERS:  11


In [None]:
def make_prediction(string:str):
  input_sequence=tokinizer.encode(string)
  input_sequence=torch.tensor([input_sequence])
  generated_tokens = seq2seq.generate_tokens(input_sequence.to(device), 3, 4,tokinizer.longets_string_len).argmax(-1)
  return tokinizer.decode(generated_tokens.squeeze(0).cpu())
pred=make_prediction("olla")
print(pred)

empirica genotype ambracian firmly touted octocat lakes brunei rectangles prefixal accomplishment gettysburg messiah syntactic backwards afford megawatts discovery subfield dioxide seth lb visit west colossus hudson austria laws mortise prewritten lapis ing oarsmen


##parameter count

In [None]:
def parametersCount(model):
  return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The seq2seq model has {parametersCount(seq2seq):,} trainable parameters')
print(f'The encoder model has {parametersCount(encoder):,} trainable parameters')
print(f'The decoder model has {parametersCount(decoder):,} trainable parameters')

The seq2seq model has 5,333,991 trainable parameters
The encoder model has 2,258,715 trainable parameters
The decoder model has 3,075,276 trainable parameters


##Optimizer & Loss


In [None]:
optimizer=torch.optim.Adam(seq2seq.parameters(),lr=0.001)
# loss=nn.CrossEntropyLoss(ignore_index=0)
loss=nn.CrossEntropyLoss()

##Training Loops

###Training Util

In [None]:
class TrainingUtil():
  def __init__(self, EPOCHS ,model,loss ,device ,savePath ,tokinizer, csvFilePath, trainSplit):
    self.EPOCHS=EPOCHS
    self.model=model
    self.loss=loss
    self.device=device
    self.savePath=savePath
    self.tokinizer=tokinizer

    self.train_dataloader,self.test_dataloader=self.createDatasetFromPandasCsv(csvFilePath,trainSplit)

    self.currentEpoch=0
    self.startPreTraining()

  def accuracy(self,predictions,targets):
    assert predictions.shape == targets.shape, "Shapes of predictions and targets must match."

    num_correct = (predictions == targets).sum().item()

    total_samples = targets.numel()
    accuracy_value = num_correct / total_samples
    return accuracy_value*100

  def getLossAndAccuracy(self,prediction,target):
    prediction=prediction.to(self.device)
    target=target.to(self.device).type(torch.int64)

    prediction_loss=self.loss(prediction.view(-1,prediction.shape[-1]),target.view(-1))
    prediction_acc=self.accuracy(prediction.argmax(2),target)

    return prediction_loss,prediction_acc

  def createDatasetFromPandasCsv(self,csvFilePath,trainSplit):
    dataset=promptDataset(csvFilePath,tokinizer)
    print("dataset size: ",len(dataset))
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    trainDataset, testDataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_dataloader=DataLoader(trainDataset,batch_size=BATCH_SIZE,shuffle=True)
    test_dataloader=DataLoader(testDataset,batch_size=BATCH_SIZE,shuffle=True)

    return train_dataloader, test_dataloader

  def make_prediction(self,input,target=None)->float:
    input=input.to(self.device)
    if target==None:
      return self.model(input)
    else:
      target=target.to(self.device)
      return self.model(input,target)


  def startPreTraining(self)->None:
    epochsToRun=self.EPOCHS+1
    for epoch in tqdm(range(1,epochsToRun)):
      self.currentEpoch=epoch

      train_state=self.trainingLoop()
      train_state=next(iter(train_state))
      train_loss,train_acc=train_state[0],train_state[1]

      test_state=self.testingLoop()
      test_state=next(iter(test_state))
      test_loss,test_acc=test_state[0],test_state[1]

      torch.save(self.model.state_dict(), f"/content/drive/MyDrive/{self.savePath}.pth")
      print(f"\n epoch: {epoch} | train_loss: {train_loss:.2f}, train_acc: {train_acc:.1f}% | test_loss: {test_loss:.2f}, test_acc: {test_acc:.1f}%")



###Training seq2seq

In [None]:
class TrainNN(TrainingUtil):
  def __init__(self,EPOCHS ,model,loss ,device ,savePath ,tokinizer, csvFilePath, trainSplit):
    super().__init__(EPOCHS ,model,loss ,device ,savePath ,tokinizer, csvFilePath, trainSplit)

  def trainingLoop(self):
    self.model.train()
    for input,target in self.train_dataloader:
      optimizer.zero_grad()
      # print("\ninput: ",input[0])
      # print("target: ",target[0])
      prediction=self.make_prediction(input,target)

      train_loss,train_acc=self.getLossAndAccuracy(prediction,target)

      train_loss.backward()
      optimizer.step()
      yield train_loss,train_acc

  def testingLoop(self):
    self.model.eval()
    with torch.inference_mode():
      for input,target in self.test_dataloader:

        prediction=self.make_prediction(input,target)
        test_loss,test_acc=self.getLossAndAccuracy(prediction,target)

        if self.currentEpoch%10 == 0:
          print("\ninput: ",     tokinizer.decode(input[0]))
          print("\ntarget: ",    tokinizer.decode(target[0]))
          print("\nprediction: ",tokinizer.decode(prediction[0].argmax(-1)))
        yield test_loss,test_acc

###Training Encocer

In [None]:
class TrainEncoder(TrainingUtil):
  def __init__(self, EPOCHS ,model,loss ,device ,savePath ,tokinizer, csvFilePath, trainSplit):
    super().__init__(EPOCHS ,model,loss ,device ,savePath ,tokinizer, csvFilePath, trainSplit)

  def trainingLoop(self):
    self.model.train()
    for input,target in self.train_dataloader:
      optimizer.zero_grad()
      prediction=self.make_prediction(input).type(torch.float32)
      train_loss,train_acc=self.getLossAndAccuracy(prediction,target)

      train_loss.backward()
      optimizer.step()
      yield train_loss,train_acc

  def testingLoop(self):
    self.model.eval()
    with torch.inference_mode():
      for input,target in self.test_dataloader:

        prediction=self.make_prediction(input).type(torch.float32)
        test_loss,test_acc=self.getLossAndAccuracy(prediction,target)

        if self.currentEpoch%10 == 0:
          print("\ninput:      ",tokinizer.decode(input[0]))
          print("\ntarget:     ",tokinizer.decode(target[0]))
          print("\nprediction: ",tokinizer.decode(prediction[0].argmax(-1)))

        yield test_loss,test_acc

###Start Training

In [None]:
EPOCHS=15000
pretrain="encoder"

In [None]:
if pretrain=="encoder":
  TrainEncoder(EPOCHS=EPOCHS,model=encoder,loss=loss,device=device,savePath="preTrainedEncoder_2",tokinizer=tokinizer,csvFilePath="/content/drive/MyDrive/preTrainingData.csv",trainSplit=.8)
else:
  TrainNN(EPOCHS=EPOCHS,model=seq2seq,loss=loss,device=device,savePath="trainedSeq2seq_2",tokinizer=tokinizer,csvFilePath="/content/drive/MyDrive/shitpostCommentData.csv",trainSplit=.8)

dataset size:  284734


  0%|          | 0/15000 [00:00<?, ?it/s]


 epoch: 1 | train_loss: 1.43, train_acc: 83.0% | test_loss: 1.38, test_acc: 83.6%


  0%|          | 2/15000 [00:00<20:44, 12.05it/s]


 epoch: 2 | train_loss: 1.37, train_acc: 84.0% | test_loss: 1.39, test_acc: 83.5%

 epoch: 3 | train_loss: 1.43, train_acc: 83.4% | test_loss: 1.31, test_acc: 85.2%


  0%|          | 4/15000 [00:00<20:54, 11.95it/s]


 epoch: 4 | train_loss: 1.33, train_acc: 84.7% | test_loss: 1.31, test_acc: 85.1%

 epoch: 5 | train_loss: 1.40, train_acc: 83.8% | test_loss: 1.36, test_acc: 83.9%


  0%|          | 6/15000 [00:00<24:24, 10.24it/s]


 epoch: 6 | train_loss: 1.33, train_acc: 83.3% | test_loss: 1.42, test_acc: 82.6%


  0%|          | 8/15000 [00:00<24:24, 10.24it/s]


 epoch: 7 | train_loss: 1.41, train_acc: 83.7% | test_loss: 1.38, test_acc: 84.0%

 epoch: 8 | train_loss: 1.44, train_acc: 83.4% | test_loss: 1.41, test_acc: 83.0%

 epoch: 9 | train_loss: 1.35, train_acc: 84.1% | test_loss: 1.30, test_acc: 84.5%


  0%|          | 10/15000 [00:00<23:53, 10.45it/s]


input:       after the founding of the roman republic in the th century bc though it did not expand outside the

target:      after the founding of the roman republic in the th century bc though it did not expand outside the italian

prediction:  after the founding of the roman republic in the th century bc though it did not other outside the the

 epoch: 10 | train_loss: 1.29, train_acc: 85.4% | test_loss: 1.38, test_acc: 83.9%

 epoch: 11 | train_loss: 1.39, train_acc: 83.5% | test_loss: 1.33, test_acc: 83.8%


  0%|          | 14/15000 [00:01<21:50, 11.44it/s]


 epoch: 12 | train_loss: 1.38, train_acc: 84.3% | test_loss: 1.31, test_acc: 84.6%

 epoch: 13 | train_loss: 1.41, train_acc: 83.7% | test_loss: 1.34, test_acc: 83.8%

 epoch: 14 | train_loss: 1.34, train_acc: 84.6% | test_loss: 1.42, test_acc: 83.2%


  0%|          | 16/15000 [00:01<21:45, 11.48it/s]


 epoch: 15 | train_loss: 1.38, train_acc: 83.3% | test_loss: 1.39, test_acc: 84.1%

 epoch: 16 | train_loss: 1.34, train_acc: 84.2% | test_loss: 1.41, test_acc: 83.2%

 epoch: 17 | train_loss: 1.35, train_acc: 83.5% | test_loss: 1.25, test_acc: 85.2%


  0%|          | 18/15000 [00:01<21:57, 11.37it/s]


 epoch: 18 | train_loss: 1.31, train_acc: 84.3% | test_loss: 1.35, test_acc: 84.1%

 epoch: 19 | train_loss: 1.36, train_acc: 84.0% | test_loss: 1.29, test_acc: 84.5%

input:       in lebanon and less than live in israel with recently there has been growing druze diaspora the bah faith

target:      in lebanon and less than live in israel with recently there has been growing druze diaspora the bah faith originated

prediction:  in objects and less than live in rapidly with types there has been growing primary formed the again faith the


  0%|          | 22/15000 [00:02<34:15,  7.29it/s]


 epoch: 20 | train_loss: 1.27, train_acc: 84.8% | test_loss: 1.35, test_acc: 84.0%

 epoch: 21 | train_loss: 1.36, train_acc: 84.0% | test_loss: 1.32, test_acc: 84.3%

 epoch: 22 | train_loss: 1.30, train_acc: 84.3% | test_loss: 1.31, test_acc: 84.7%


  0%|          | 24/15000 [00:02<30:13,  8.26it/s]


 epoch: 23 | train_loss: 1.35, train_acc: 84.4% | test_loss: 1.25, test_acc: 84.4%

 epoch: 24 | train_loss: 1.34, train_acc: 84.3% | test_loss: 1.40, test_acc: 83.9%

 epoch: 25 | train_loss: 1.29, train_acc: 84.7% | test_loss: 1.21, test_acc: 86.3%


  0%|          | 28/15000 [00:02<25:21,  9.84it/s]


 epoch: 26 | train_loss: 1.40, train_acc: 83.0% | test_loss: 1.45, test_acc: 83.2%

 epoch: 27 | train_loss: 1.38, train_acc: 84.2% | test_loss: 1.37, test_acc: 83.6%

 epoch: 28 | train_loss: 1.31, train_acc: 84.6% | test_loss: 1.34, test_acc: 84.2%


  0%|          | 30/15000 [00:03<25:21,  9.84it/s]


 epoch: 29 | train_loss: 1.29, train_acc: 85.2% | test_loss: 1.36, test_acc: 84.1%

input:       off the continent coasts principally the arctic archipelago the bahamas turks caicos the greater and lesser antilles the aleutian

target:      off the continent coasts principally the arctic archipelago the bahamas turks caicos the greater and lesser antilles the aleutian islands

prediction:  off the continent in south the arctic archipelago the functions influence home the greater and between was the aleutian the

 epoch: 30 | train_loss: 1.26, train_acc: 85.3% | test_loss: 1.22, test_acc: 85.6%


  0%|          | 32/15000 [00:03<24:26, 10.21it/s]


 epoch: 31 | train_loss: 1.22, train_acc: 85.7% | test_loss: 1.24, test_acc: 84.7%

 epoch: 32 | train_loss: 1.27, train_acc: 85.0% | test_loss: 1.24, test_acc: 84.9%

 epoch: 33 | train_loss: 1.13, train_acc: 86.6% | test_loss: 1.26, test_acc: 84.8%


  0%|          | 36/15000 [00:04<34:32,  7.22it/s]


 epoch: 34 | train_loss: 1.29, train_acc: 84.7% | test_loss: 1.22, test_acc: 86.0%

 epoch: 35 | train_loss: 1.31, train_acc: 84.2% | test_loss: 1.25, test_acc: 84.9%

 epoch: 36 | train_loss: 1.30, train_acc: 83.9% | test_loss: 1.30, test_acc: 84.5%


  0%|          | 38/15000 [00:04<30:32,  8.16it/s]


 epoch: 37 | train_loss: 1.33, train_acc: 84.5% | test_loss: 1.34, test_acc: 83.5%

 epoch: 38 | train_loss: 1.31, train_acc: 84.6% | test_loss: 1.18, test_acc: 85.6%

 epoch: 39 | train_loss: 1.39, train_acc: 83.9% | test_loss: 1.29, test_acc: 85.3%


  0%|          | 40/15000 [00:04<28:42,  8.68it/s]


input:       danish ties due to centuries of colonization by denmark the and canada have major sports teams that compete

target:      danish ties due to centuries of colonization by denmark the and canada have major sports teams that compete against

prediction:  on available due to centuries of colonization by civilization the and canada have major who wide that especially of

 epoch: 40 | train_loss: 1.28, train_acc: 85.0% | test_loss: 1.31, test_acc: 84.3%

 epoch: 41 | train_loss: 1.21, train_acc: 85.6% | test_loss: 1.35, test_acc: 83.6%


  0%|          | 44/15000 [00:04<24:26, 10.20it/s]


 epoch: 42 | train_loss: 1.25, train_acc: 84.7% | test_loss: 1.30, test_acc: 84.5%

 epoch: 43 | train_loss: 1.24, train_acc: 86.0% | test_loss: 1.30, test_acc: 84.7%

 epoch: 44 | train_loss: 1.20, train_acc: 85.7% | test_loss: 1.17, test_acc: 86.5%


  0%|          | 46/15000 [00:04<23:30, 10.60it/s]


 epoch: 45 | train_loss: 1.27, train_acc: 84.6% | test_loss: 1.23, test_acc: 85.4%

 epoch: 46 | train_loss: 1.24, train_acc: 85.4% | test_loss: 1.23, test_acc: 85.3%

 epoch: 47 | train_loss: 1.31, train_acc: 84.4% | test_loss: 1.39, test_acc: 83.6%


  0%|          | 48/15000 [00:05<23:04, 10.80it/s]


 epoch: 48 | train_loss: 1.24, train_acc: 85.5% | test_loss: 1.24, test_acc: 85.2%


  0%|          | 50/15000 [00:05<34:22,  7.25it/s]


 epoch: 49 | train_loss: 1.21, train_acc: 85.7% | test_loss: 1.26, test_acc: 85.3%

input:       cretaceous about ma during the early paleogene antarctica remained connected to south america as well as to southeastern australia

target:      cretaceous about ma during the early paleogene antarctica remained connected to south america as well as to southeastern australia fauna

prediction:  organisms about ma during the early of antarctica remained connected to south america as well as to pacific australia the

 epoch: 50 | train_loss: 1.34, train_acc: 84.5% | test_loss: 1.36, test_acc: 83.8%

 epoch: 51 | train_loss: 1.28, train_acc: 84.6% | test_loss: 1.23, test_acc: 85.0%


  0%|          | 54/15000 [00:05<26:48,  9.29it/s]


 epoch: 52 | train_loss: 1.30, train_acc: 84.4% | test_loss: 1.28, test_acc: 84.2%

 epoch: 53 | train_loss: 1.20, train_acc: 85.1% | test_loss: 1.21, test_acc: 85.1%

 epoch: 54 | train_loss: 1.38, train_acc: 83.6% | test_loss: 1.23, test_acc: 86.0%


  0%|          | 56/15000 [00:06<24:48, 10.04it/s]


 epoch: 55 | train_loss: 1.32, train_acc: 84.3% | test_loss: 1.24, test_acc: 85.7%

 epoch: 56 | train_loss: 1.28, train_acc: 85.2% | test_loss: 1.34, test_acc: 84.6%

 epoch: 57 | train_loss: 1.23, train_acc: 86.5% | test_loss: 1.27, test_acc: 84.8%


  0%|          | 58/15000 [00:06<23:27, 10.62it/s]


 epoch: 58 | train_loss: 1.25, train_acc: 85.1% | test_loss: 1.26, test_acc: 85.4%

 epoch: 59 | train_loss: 1.21, train_acc: 85.3% | test_loss: 1.33, test_acc: 83.8%

input:       political disputes escalated into famines and small scale civil wars yet despite difficult problems local leaders owing no tribute

target:      political disputes escalated into famines and small scale civil wars yet despite difficult problems local leaders owing no tribute to

prediction:  political form as into persson and small scale civil wars yet despite difficult problems local allowed death no minecraft the


  0%|          | 62/15000 [00:06<23:46, 10.47it/s]


 epoch: 60 | train_loss: 1.30, train_acc: 84.0% | test_loss: 1.26, test_acc: 84.7%

 epoch: 61 | train_loss: 1.30, train_acc: 84.5% | test_loss: 1.40, test_acc: 84.1%

 epoch: 62 | train_loss: 1.30, train_acc: 84.9% | test_loss: 1.25, test_acc: 85.6%


  0%|          | 64/15000 [00:07<39:29,  6.30it/s]


 epoch: 63 | train_loss: 1.27, train_acc: 85.0% | test_loss: 1.33, test_acc: 84.3%

 epoch: 64 | train_loss: 1.29, train_acc: 85.4% | test_loss: 1.26, test_acc: 85.0%

 epoch: 65 | train_loss: 1.38, train_acc: 83.5% | test_loss: 1.18, test_acc: 86.0%


  0%|          | 68/15000 [00:07<29:33,  8.42it/s]


 epoch: 66 | train_loss: 1.20, train_acc: 86.0% | test_loss: 1.25, test_acc: 85.0%

 epoch: 67 | train_loss: 1.11, train_acc: 86.7% | test_loss: 1.29, test_acc: 84.7%

 epoch: 68 | train_loss: 1.20, train_acc: 85.7% | test_loss: 1.25, test_acc: 85.5%


  0%|          | 70/15000 [00:07<27:46,  8.96it/s]


 epoch: 69 | train_loss: 1.24, train_acc: 85.9% | test_loss: 1.22, test_acc: 85.3%

input:       of memes include the distracted boyfriend meme and the harlem shake viral videos these memes reflect the cultural references

target:      of memes include the distracted boyfriend meme and the harlem shake viral videos these memes reflect the cultural references and

prediction:  of memes include the help public meme and the peace including year they these memes second the cultural increasing the

 epoch: 70 | train_loss: 1.16, train_acc: 86.3% | test_loss: 1.24, test_acc: 86.0%

 epoch: 71 | train_loss: 1.19, train_acc: 86.1% | test_loss: 1.27, test_acc: 85.2%


  0%|          | 74/15000 [00:08<24:13, 10.27it/s]


 epoch: 72 | train_loss: 1.35, train_acc: 83.7% | test_loss: 1.24, test_acc: 85.6%

 epoch: 73 | train_loss: 1.29, train_acc: 85.5% | test_loss: 1.25, test_acc: 85.4%

 epoch: 74 | train_loss: 1.24, train_acc: 85.1% | test_loss: 1.11, test_acc: 86.8%


  1%|          | 76/15000 [00:08<23:20, 10.65it/s]


 epoch: 75 | train_loss: 1.20, train_acc: 85.6% | test_loss: 1.19, test_acc: 86.1%

 epoch: 76 | train_loss: 1.28, train_acc: 84.3% | test_loss: 1.13, test_acc: 86.3%

 epoch: 77 | train_loss: 1.23, train_acc: 85.5% | test_loss: 1.24, test_acc: 84.8%


  1%|          | 80/15000 [00:09<34:07,  7.29it/s]


 epoch: 78 | train_loss: 1.22, train_acc: 85.8% | test_loss: 1.30, test_acc: 84.4%

 epoch: 79 | train_loss: 1.18, train_acc: 86.4% | test_loss: 1.31, test_acc: 84.3%

input:       also categorize easter island the desventuradas islands and the juan fern ndez islands as being part of region titled

target:      also categorize easter island the desventuradas islands and the juan fern ndez islands as being part of region titled insular

prediction:  also more easter island the background islands and the such level have islands as being part of region titled the

 epoch: 80 | train_loss: 1.26, train_acc: 85.0% | test_loss: 1.13, test_acc: 87.1%


  1%|          | 82/15000 [00:09<29:54,  8.31it/s]


 epoch: 81 | train_loss: 1.26, train_acc: 85.4% | test_loss: 1.19, test_acc: 86.5%

 epoch: 82 | train_loss: 1.26, train_acc: 85.0% | test_loss: 1.18, test_acc: 85.9%

 epoch: 83 | train_loss: 1.26, train_acc: 85.3% | test_loss: 1.32, test_acc: 84.7%


  1%|          | 86/15000 [00:09<24:59,  9.95it/s]


 epoch: 84 | train_loss: 1.22, train_acc: 85.5% | test_loss: 1.26, test_acc: 85.3%

 epoch: 85 | train_loss: 1.29, train_acc: 84.8% | test_loss: 1.34, test_acc: 84.0%

 epoch: 86 | train_loss: 1.24, train_acc: 85.2% | test_loss: 1.18, test_acc: 85.8%


  1%|          | 88/15000 [00:09<23:44, 10.47it/s]


 epoch: 87 | train_loss: 1.15, train_acc: 86.6% | test_loss: 1.12, test_acc: 87.5%

 epoch: 88 | train_loss: 1.21, train_acc: 86.0% | test_loss: 1.20, test_acc: 85.5%

 epoch: 89 | train_loss: 1.27, train_acc: 85.0% | test_loss: 1.27, test_acc: 85.2%


  1%|          | 90/15000 [00:10<26:26,  9.40it/s]


input:       users with their admin privileges and went on mini power trips demonstrating that great power is not always accompanied

target:      users with their admin privileges and went on mini power trips demonstrating that great power is not always accompanied by

prediction:  users with their world major and went on showed power mediterranean physical that great power is not always accompanied the

 epoch: 90 | train_loss: 1.20, train_acc: 86.1% | test_loss: 1.25, test_acc: 85.3%

 epoch: 91 | train_loss: 1.20, train_acc: 85.9% | test_loss: 1.23, test_acc: 85.7%


  1%|          | 93/15000 [00:10<36:34,  6.79it/s]


 epoch: 92 | train_loss: 1.15, train_acc: 85.8% | test_loss: 1.17, test_acc: 85.8%

 epoch: 93 | train_loss: 1.25, train_acc: 85.4% | test_loss: 1.36, test_acc: 84.2%


  1%|          | 95/15000 [00:10<32:18,  7.69it/s]


 epoch: 94 | train_loss: 1.16, train_acc: 86.1% | test_loss: 1.31, test_acc: 84.4%

 epoch: 95 | train_loss: 1.24, train_acc: 85.1% | test_loss: 1.22, test_acc: 85.9%

 epoch: 96 | train_loss: 1.17, train_acc: 86.0% | test_loss: 1.27, test_acc: 84.5%


  1%|          | 98/15000 [00:11<28:51,  8.60it/s]


 epoch: 97 | train_loss: 1.15, train_acc: 86.7% | test_loss: 1.20, test_acc: 85.5%

 epoch: 98 | train_loss: 1.15, train_acc: 86.4% | test_loss: 1.18, test_acc: 86.1%


  1%|          | 100/15000 [00:11<29:10,  8.51it/s]


 epoch: 99 | train_loss: 1.41, train_acc: 83.7% | test_loss: 1.22, test_acc: 85.9%

input:       addition powershell allows formatting definitions to be specified so the text representation of objects can be customized by choosing

target:      addition powershell allows formatting definitions to be specified so the text representation of objects can be customized by choosing which

prediction:  addition powershell allows modern definitions to be rule so the text representation of objects can be online by good the

 epoch: 100 | train_loss: 1.17, train_acc: 85.9% | test_loss: 1.21, test_acc: 86.2%


  1%|          | 102/15000 [00:11<27:48,  8.93it/s]


 epoch: 101 | train_loss: 1.15, train_acc: 86.8% | test_loss: 1.14, test_acc: 87.3%

 epoch: 102 | train_loss: 1.26, train_acc: 85.3% | test_loss: 1.22, test_acc: 85.8%


  1%|          | 104/15000 [00:11<27:06,  9.16it/s]


 epoch: 103 | train_loss: 1.23, train_acc: 85.2% | test_loss: 1.19, test_acc: 86.3%

 epoch: 104 | train_loss: 1.18, train_acc: 85.8% | test_loss: 1.16, test_acc: 86.4%


  1%|          | 105/15000 [00:11<27:36,  8.99it/s]


 epoch: 105 | train_loss: 1.12, train_acc: 86.9% | test_loss: 1.26, test_acc: 84.4%


  1%|          | 107/15000 [00:12<48:37,  5.10it/s]


 epoch: 106 | train_loss: 1.14, train_acc: 86.6% | test_loss: 1.25, test_acc: 85.4%

 epoch: 107 | train_loss: 1.23, train_acc: 86.0% | test_loss: 1.28, test_acc: 85.3%

 epoch: 108 | train_loss: 1.33, train_acc: 84.2% | test_loss: 1.24, test_acc: 85.5%


  1%|          | 110/15000 [00:12<37:34,  6.61it/s]


 epoch: 109 | train_loss: 1.20, train_acc: 86.1% | test_loss: 1.19, test_acc: 86.2%

input:       unit natural language programming has been proposed as way to eliminate the need for specialized language for programming however

target:      unit natural language programming has been proposed as way to eliminate the need for specialized language for programming however this

prediction:  unit natural language programming has been proposed as way to red the need for specialized language for programming however the

 epoch: 110 | train_loss: 1.27, train_acc: 85.3% | test_loss: 1.20, test_acc: 84.8%


  1%|          | 112/15000 [00:13<30:58,  8.01it/s]


 epoch: 111 | train_loss: 1.05, train_acc: 87.8% | test_loss: 1.21, test_acc: 86.2%

 epoch: 112 | train_loss: 1.12, train_acc: 86.3% | test_loss: 1.03, test_acc: 87.9%

 epoch: 113 | train_loss: 1.25, train_acc: 85.3% | test_loss: 1.15, test_acc: 86.6%


  1%|          | 116/15000 [00:13<25:04,  9.90it/s]


 epoch: 114 | train_loss: 1.22, train_acc: 85.3% | test_loss: 1.17, test_acc: 86.4%

 epoch: 115 | train_loss: 1.12, train_acc: 87.0% | test_loss: 1.26, test_acc: 85.7%

 epoch: 116 | train_loss: 1.21, train_acc: 85.9% | test_loss: 1.13, test_acc: 87.1%


  1%|          | 118/15000 [00:13<23:57, 10.36it/s]


 epoch: 117 | train_loss: 1.20, train_acc: 86.5% | test_loss: 1.17, test_acc: 86.3%

 epoch: 118 | train_loss: 1.14, train_acc: 86.6% | test_loss: 1.11, test_acc: 86.7%

 epoch: 119 | train_loss: 1.19, train_acc: 86.0% | test_loss: 1.24, test_acc: 85.8%

input:       in april reddit faced criticism after users wrongly identified number of people as suspects notable among misidentified bombing suspects

target:      in april reddit faced criticism after users wrongly identified number of people as suspects notable among misidentified bombing suspects was

prediction:  in april reddit faced caribbean after users ones identified number of people as than notable among war southeast than the


  1%|          | 122/15000 [00:14<35:07,  7.06it/s]


 epoch: 120 | train_loss: 1.10, train_acc: 86.6% | test_loss: 1.18, test_acc: 85.9%

 epoch: 121 | train_loss: 1.14, train_acc: 86.6% | test_loss: 1.23, test_acc: 85.1%

 epoch: 122 | train_loss: 1.31, train_acc: 84.8% | test_loss: 1.22, test_acc: 85.8%


  1%|          | 124/15000 [00:14<30:43,  8.07it/s]


 epoch: 123 | train_loss: 1.14, train_acc: 86.5% | test_loss: 1.10, test_acc: 87.4%

 epoch: 124 | train_loss: 1.33, train_acc: 84.7% | test_loss: 1.16, test_acc: 86.5%

 epoch: 125 | train_loss: 1.20, train_acc: 86.0% | test_loss: 1.15, test_acc: 86.8%


  1%|          | 128/15000 [00:14<24:55,  9.95it/s]


 epoch: 126 | train_loss: 1.13, train_acc: 86.9% | test_loss: 1.13, test_acc: 86.6%

 epoch: 127 | train_loss: 1.05, train_acc: 87.4% | test_loss: 1.24, test_acc: 85.0%

 epoch: 128 | train_loss: 1.18, train_acc: 86.3% | test_loss: 1.09, test_acc: 86.8%


  1%|          | 130/15000 [00:15<25:08,  9.86it/s]


 epoch: 129 | train_loss: 1.12, train_acc: 86.8% | test_loss: 1.11, test_acc: 87.3%

input:       physics directly or significantly uses the modes of thought such as the approach to problem solving developed in your

target:      physics directly or significantly uses the modes of thought such as the approach to problem solving developed in your education

prediction:  physics directly or significantly uses the modes of thought such as the approach to problem approximately developed in your the

 epoch: 130 | train_loss: 1.25, train_acc: 84.9% | test_loss: 1.11, test_acc: 87.1%


  1%|          | 132/15000 [00:15<24:02, 10.31it/s]


 epoch: 131 | train_loss: 1.16, train_acc: 86.6% | test_loss: 1.16, test_acc: 86.6%

 epoch: 132 | train_loss: 1.12, train_acc: 87.1% | test_loss: 1.28, test_acc: 85.5%

 epoch: 133 | train_loss: 1.23, train_acc: 85.5% | test_loss: 1.19, test_acc: 85.4%


  1%|          | 136/15000 [00:15<23:23, 10.59it/s]


 epoch: 134 | train_loss: 1.19, train_acc: 85.8% | test_loss: 1.13, test_acc: 86.7%

 epoch: 135 | train_loss: 1.18, train_acc: 86.4% | test_loss: 1.04, test_acc: 88.2%

 epoch: 136 | train_loss: 1.26, train_acc: 85.0% | test_loss: 1.15, test_acc: 86.5%


  1%|          | 138/15000 [00:15<22:43, 10.90it/s]


 epoch: 137 | train_loss: 1.25, train_acc: 84.9% | test_loss: 1.25, test_acc: 85.4%

 epoch: 138 | train_loss: 1.09, train_acc: 87.3% | test_loss: 1.13, test_acc: 86.9%

 epoch: 139 | train_loss: 1.21, train_acc: 85.8% | test_loss: 1.02, test_acc: 88.4%


  1%|          | 140/15000 [00:16<23:17, 10.63it/s]


input:       and contained only single tree feet high in the early th century several avant garde movements including the dadaists

target:      and contained only single tree feet high in the early th century several avant garde movements including the dadaists surrealists

prediction:  and contained only single tree southern high in the early th century several transmission when movements including the june the

 epoch: 140 | train_loss: 1.14, train_acc: 86.4% | test_loss: 1.13, test_acc: 86.6%

 epoch: 141 | train_loss: 1.08, train_acc: 87.2% | test_loss: 1.06, test_acc: 87.1%


  1%|          | 144/15000 [00:16<21:35, 11.46it/s]


 epoch: 142 | train_loss: 1.10, train_acc: 86.8% | test_loss: 1.13, test_acc: 86.2%

 epoch: 143 | train_loss: 1.06, train_acc: 87.8% | test_loss: 1.07, test_acc: 87.8%

 epoch: 144 | train_loss: 1.23, train_acc: 85.0% | test_loss: 1.26, test_acc: 85.7%


  1%|          | 146/15000 [00:16<21:15, 11.65it/s]


 epoch: 145 | train_loss: 1.15, train_acc: 86.6% | test_loss: 1.15, test_acc: 86.3%

 epoch: 146 | train_loss: 1.07, train_acc: 87.7% | test_loss: 1.14, test_acc: 86.4%

 epoch: 147 | train_loss: 1.17, train_acc: 85.8% | test_loss: 1.07, test_acc: 86.8%


  1%|          | 148/15000 [00:16<20:57, 11.81it/s]


 epoch: 148 | train_loss: 1.16, train_acc: 86.1% | test_loss: 1.17, test_acc: 86.4%


  1%|          | 150/15000 [00:17<29:52,  8.29it/s]


 epoch: 149 | train_loss: 1.17, train_acc: 86.2% | test_loss: 1.12, test_acc: 86.7%

input:       decian persecution of seriously threatened the church but ultimately strengthened christian defiance diocletian undertook the most severe persecution of

target:      decian persecution of seriously threatened the church but ultimately strengthened christian defiance diocletian undertook the most severe persecution of christians

prediction:  as both of methods threatened the church but ultimately these christian law diocletian of the most severe both of the

 epoch: 150 | train_loss: 1.12, train_acc: 86.8% | test_loss: 1.15, test_acc: 86.3%

 epoch: 151 | train_loss: 1.07, train_acc: 87.1% | test_loss: 1.13, test_acc: 86.5%


  1%|          | 154/15000 [00:17<24:52,  9.95it/s]


 epoch: 152 | train_loss: 1.10, train_acc: 87.3% | test_loss: 1.07, test_acc: 87.4%

 epoch: 153 | train_loss: 1.01, train_acc: 88.0% | test_loss: 1.08, test_acc: 86.9%

 epoch: 154 | train_loss: 1.08, train_acc: 87.0% | test_loss: 1.07, test_acc: 87.6%


  1%|          | 156/15000 [00:17<23:14, 10.65it/s]


 epoch: 155 | train_loss: 1.12, train_acc: 86.6% | test_loss: 1.04, test_acc: 87.6%

 epoch: 156 | train_loss: 1.16, train_acc: 86.6% | test_loss: 1.16, test_acc: 86.6%

 epoch: 157 | train_loss: 1.10, train_acc: 87.2% | test_loss: 1.04, test_acc: 87.8%


  1%|          | 158/15000 [00:17<22:22, 11.06it/s]


 epoch: 158 | train_loss: 1.17, train_acc: 86.4% | test_loss: 1.21, test_acc: 85.9%

 epoch: 159 | train_loss: 1.21, train_acc: 85.7% | test_loss: 1.14, test_acc: 86.6%

input:       order to fully engage with and excel in deep and time consuming hobbies as exemplified in the hacker ethic

target:      order to fully engage with and excel in deep and time consuming hobbies as exemplified in the hacker ethic social

prediction:  order to fully colombia with and methods in originated and time rights regard as african in the years any the


  1%|          | 162/15000 [00:18<22:30, 10.99it/s]


 epoch: 160 | train_loss: 1.17, train_acc: 86.0% | test_loss: 1.16, test_acc: 86.1%

 epoch: 161 | train_loss: 1.05, train_acc: 87.2% | test_loss: 1.11, test_acc: 86.7%

 epoch: 162 | train_loss: 0.99, train_acc: 88.3% | test_loss: 1.09, test_acc: 87.3%


  1%|          | 164/15000 [00:18<38:38,  6.40it/s]


 epoch: 163 | train_loss: 1.09, train_acc: 86.7% | test_loss: 0.99, test_acc: 88.3%

 epoch: 164 | train_loss: 1.19, train_acc: 86.0% | test_loss: 1.08, test_acc: 87.7%

 epoch: 165 | train_loss: 1.07, train_acc: 87.2% | test_loss: 1.30, test_acc: 84.3%


  1%|          | 168/15000 [00:19<29:01,  8.52it/s]


 epoch: 166 | train_loss: 1.03, train_acc: 87.9% | test_loss: 1.01, test_acc: 88.3%

 epoch: 167 | train_loss: 1.05, train_acc: 87.4% | test_loss: 1.08, test_acc: 86.7%

 epoch: 168 | train_loss: 1.09, train_acc: 87.0% | test_loss: 1.15, test_acc: 86.5%


  1%|          | 170/15000 [00:19<27:14,  9.07it/s]


 epoch: 169 | train_loss: 1.16, train_acc: 86.5% | test_loss: 1.13, test_acc: 86.4%

input:       in some countries portuguese is the official language of brazil dutch is the official language of suriname english is

target:      in some countries portuguese is the official language of brazil dutch is the official language of suriname english is the

prediction:  in some countries portuguese is the official language of brazil dutch is the official language of change english is the

 epoch: 170 | train_loss: 1.15, train_acc: 86.0% | test_loss: 1.01, test_acc: 87.8%

 epoch: 171 | train_loss: 1.11, train_acc: 87.0% | test_loss: 1.11, test_acc: 87.6%


  1%|          | 174/15000 [00:19<23:42, 10.42it/s]


 epoch: 172 | train_loss: 1.10, train_acc: 87.1% | test_loss: 1.15, test_acc: 86.7%

 epoch: 173 | train_loss: 1.08, train_acc: 87.7% | test_loss: 1.01, test_acc: 88.1%

 epoch: 174 | train_loss: 1.10, train_acc: 87.2% | test_loss: 1.18, test_acc: 86.6%


  1%|          | 176/15000 [00:19<22:36, 10.93it/s]


 epoch: 175 | train_loss: 1.14, train_acc: 86.6% | test_loss: 1.15, test_acc: 86.9%

 epoch: 176 | train_loss: 1.09, train_acc: 87.9% | test_loss: 1.03, test_acc: 87.6%


  1%|          | 178/15000 [00:20<26:54,  9.18it/s]


 epoch: 177 | train_loss: 1.19, train_acc: 86.2% | test_loss: 1.08, test_acc: 86.9%

 epoch: 178 | train_loss: 0.97, train_acc: 88.5% | test_loss: 1.02, test_acc: 88.0%

 epoch: 179 | train_loss: 1.10, train_acc: 86.8% | test_loss: 1.01, test_acc: 88.3%


  1%|          | 180/15000 [00:20<26:23,  9.36it/s]


input:       with roughly million species threatened with extinction within decades the loss of biodiversity and ecosystem functions over the last

target:      with roughly million species threatened with extinction within decades the loss of biodiversity and ecosystem functions over the last half

prediction:  with roughly million species authority with extinction within decades the loss of april and ecosystem functions over the last the

 epoch: 180 | train_loss: 1.00, train_acc: 88.3% | test_loss: 1.08, test_acc: 87.2%

 epoch: 181 | train_loss: 1.15, train_acc: 86.4% | test_loss: 0.99, test_acc: 88.3%


  1%|          | 184/15000 [00:20<22:51, 10.80it/s]


 epoch: 182 | train_loss: 1.00, train_acc: 88.2% | test_loss: 1.08, test_acc: 87.7%

 epoch: 183 | train_loss: 1.02, train_acc: 87.7% | test_loss: 1.04, test_acc: 88.0%

 epoch: 184 | train_loss: 1.02, train_acc: 88.4% | test_loss: 1.16, test_acc: 86.2%


  1%|          | 186/15000 [00:20<22:02, 11.20it/s]


 epoch: 185 | train_loss: 1.06, train_acc: 87.7% | test_loss: 1.00, test_acc: 88.2%

 epoch: 186 | train_loss: 1.11, train_acc: 86.9% | test_loss: 1.02, test_acc: 87.8%

 epoch: 187 | train_loss: 1.11, train_acc: 87.0% | test_loss: 1.05, test_acc: 87.4%


  1%|▏         | 190/15000 [00:21<22:21, 11.04it/s]


 epoch: 188 | train_loss: 1.07, train_acc: 87.3% | test_loss: 1.00, test_acc: 88.0%

 epoch: 189 | train_loss: 1.06, train_acc: 87.3% | test_loss: 0.99, test_acc: 88.6%

input:       coinage in circulation increased the money supply for trading or saving rome had no central bank and regulation of

target:      coinage in circulation increased the money supply for trading or saving rome had no central bank and regulation of the

prediction:  scale in features increased the money supply for trading or certain rome had no central bank and mode of the

 epoch: 190 | train_loss: 1.05, train_acc: 87.4% | test_loss: 1.11, test_acc: 86.6%

 epoch: 191 | train_loss: 1.01, train_acc: 87.4% | test_loss: 1.04, test_acc: 87.2%


  1%|▏         | 194/15000 [00:21<32:16,  7.65it/s]


 epoch: 192 | train_loss: 1.17, train_acc: 86.6% | test_loss: 1.08, test_acc: 87.4%

 epoch: 193 | train_loss: 1.07, train_acc: 88.4% | test_loss: 1.02, test_acc: 87.8%

 epoch: 194 | train_loss: 1.13, train_acc: 86.2% | test_loss: 1.06, test_acc: 88.1%


  1%|▏         | 196/15000 [00:21<28:19,  8.71it/s]


 epoch: 195 | train_loss: 1.19, train_acc: 85.8% | test_loss: 1.04, test_acc: 87.9%

 epoch: 196 | train_loss: 1.11, train_acc: 86.9% | test_loss: 1.09, test_acc: 87.3%

 epoch: 197 | train_loss: 1.03, train_acc: 88.0% | test_loss: 1.11, test_acc: 87.4%


  1%|▏         | 200/15000 [00:22<25:30,  9.67it/s]


 epoch: 198 | train_loss: 1.02, train_acc: 87.9% | test_loss: 1.00, test_acc: 87.9%

 epoch: 199 | train_loss: 1.06, train_acc: 88.0% | test_loss: 1.05, test_acc: 87.4%

input:       school model for tutoring subject syllabus and governance with minor changes schools in india range from large campuses with

target:      school model for tutoring subject syllabus and governance with minor changes schools in india range from large campuses with thousands

prediction:  school model for themselves subject depend and are with minor changes schools in india range from large trade with the

 epoch: 200 | train_loss: 1.20, train_acc: 86.2% | test_loss: 1.08, test_acc: 87.6%


  1%|▏         | 202/15000 [00:22<24:13, 10.18it/s]


 epoch: 201 | train_loss: 1.16, train_acc: 86.8% | test_loss: 1.01, test_acc: 88.4%

 epoch: 202 | train_loss: 1.05, train_acc: 87.7% | test_loss: 1.19, test_acc: 85.7%

 epoch: 203 | train_loss: 0.99, train_acc: 89.0% | test_loss: 1.06, test_acc: 87.5%


  1%|▏         | 204/15000 [00:22<23:14, 10.61it/s]


 epoch: 204 | train_loss: 1.06, train_acc: 87.8% | test_loss: 1.09, test_acc: 87.4%

 epoch: 205 | train_loss: 1.21, train_acc: 85.1% | test_loss: 1.05, test_acc: 87.7%


  1%|▏         | 208/15000 [00:23<34:37,  7.12it/s]


 epoch: 206 | train_loss: 0.94, train_acc: 88.7% | test_loss: 1.11, test_acc: 86.7%

 epoch: 207 | train_loss: 0.96, train_acc: 89.2% | test_loss: 0.98, test_acc: 88.1%

 epoch: 208 | train_loss: 1.11, train_acc: 86.8% | test_loss: 1.15, test_acc: 86.6%


  1%|▏         | 210/15000 [00:23<33:48,  7.29it/s]


 epoch: 209 | train_loss: 1.06, train_acc: 87.6% | test_loss: 1.02, test_acc: 87.9%

input:       when the assyrian king shalmaneser iii named ahab the israelite among his enemies at the battle of qarqar judah

target:      when the assyrian king shalmaneser iii named ahab the israelite among his enemies at the battle of qarqar judah emerged

prediction:  when the indo king years iii named on the claimed among his enemies at the battle of tomb australian the

 epoch: 210 | train_loss: 1.00, train_acc: 88.4% | test_loss: 1.09, test_acc: 86.9%


  1%|▏         | 213/15000 [00:24<28:33,  8.63it/s]


 epoch: 211 | train_loss: 1.06, train_acc: 87.7% | test_loss: 0.99, test_acc: 88.2%

 epoch: 212 | train_loss: 1.00, train_acc: 88.0% | test_loss: 1.03, test_acc: 87.9%

 epoch: 213 | train_loss: 1.06, train_acc: 87.0% | test_loss: 1.10, test_acc: 87.0%


  1%|▏         | 214/15000 [00:24<27:43,  8.89it/s]


 epoch: 214 | train_loss: 1.01, train_acc: 88.4% | test_loss: 1.07, test_acc: 87.4%

 epoch: 215 | train_loss: 1.11, train_acc: 86.7% | test_loss: 1.06, test_acc: 86.9%


  1%|▏         | 217/15000 [00:24<26:52,  9.17it/s]


 epoch: 216 | train_loss: 1.03, train_acc: 87.7% | test_loss: 1.01, test_acc: 88.0%

 epoch: 217 | train_loss: 0.95, train_acc: 89.2% | test_loss: 1.06, test_acc: 87.6%


  1%|▏         | 219/15000 [00:24<26:57,  9.14it/s]


 epoch: 218 | train_loss: 1.08, train_acc: 87.3% | test_loss: 1.09, test_acc: 86.6%

 epoch: 219 | train_loss: 1.00, train_acc: 88.1% | test_loss: 1.01, test_acc: 87.9%

input:       forces in driving evolutionary change is an area of current research mutation bias is usually conceived as difference in

target:      forces in driving evolutionary change is an area of current research mutation bias is usually conceived as difference in expected

prediction:  forces in implementation evolutionary change is an area of current research mutation bias is usually revolution as difference in the


  1%|▏         | 221/15000 [00:25<51:43,  4.76it/s]  


 epoch: 220 | train_loss: 1.06, train_acc: 87.3% | test_loss: 1.03, test_acc: 88.3%

 epoch: 221 | train_loss: 0.99, train_acc: 87.8% | test_loss: 1.04, test_acc: 87.8%


  1%|▏         | 224/15000 [00:25<35:17,  6.98it/s]


 epoch: 222 | train_loss: 1.03, train_acc: 88.3% | test_loss: 1.12, test_acc: 86.3%

 epoch: 223 | train_loss: 0.98, train_acc: 88.4% | test_loss: 1.03, test_acc: 87.8%

 epoch: 224 | train_loss: 1.06, train_acc: 87.7% | test_loss: 1.05, test_acc: 87.7%


  2%|▏         | 227/15000 [00:26<29:12,  8.43it/s]


 epoch: 225 | train_loss: 1.00, train_acc: 88.3% | test_loss: 0.97, test_acc: 89.1%

 epoch: 226 | train_loss: 0.95, train_acc: 88.7% | test_loss: 1.05, test_acc: 87.6%

 epoch: 227 | train_loss: 1.04, train_acc: 87.4% | test_loss: 1.06, test_acc: 87.5%


  2%|▏         | 228/15000 [00:26<28:13,  8.72it/s]


 epoch: 228 | train_loss: 0.93, train_acc: 89.2% | test_loss: 1.03, test_acc: 87.9%

 epoch: 229 | train_loss: 0.98, train_acc: 88.4% | test_loss: 1.01, test_acc: 87.9%

input:       ottoman empire to the eastern lands and armed resistance by the tribes of siberia synthesized new russian empire extending

target:      ottoman empire to the eastern lands and armed resistance by the tribes of siberia synthesized new russian empire extending to

prediction:  ottoman empire to the eastern lands and ended social by the tribes of mixed poverty new russian empire together the


  2%|▏         | 231/15000 [00:26<26:59,  9.12it/s]


 epoch: 230 | train_loss: 0.98, train_acc: 88.0% | test_loss: 1.00, test_acc: 88.0%

 epoch: 231 | train_loss: 1.06, train_acc: 87.4% | test_loss: 1.04, test_acc: 88.6%


  2%|▏         | 232/15000 [00:26<26:34,  9.26it/s]


 epoch: 232 | train_loss: 1.05, train_acc: 87.6% | test_loss: 1.01, test_acc: 88.1%

 epoch: 233 | train_loss: 1.02, train_acc: 88.9% | test_loss: 1.03, test_acc: 87.8%


  2%|▏         | 236/15000 [00:27<35:57,  6.84it/s]


 epoch: 234 | train_loss: 1.11, train_acc: 87.2% | test_loss: 1.06, test_acc: 87.5%

 epoch: 235 | train_loss: 1.03, train_acc: 87.3% | test_loss: 1.03, test_acc: 87.5%

 epoch: 236 | train_loss: 1.00, train_acc: 87.9% | test_loss: 1.07, test_acc: 87.3%


  2%|▏         | 238/15000 [00:27<31:14,  7.87it/s]


 epoch: 237 | train_loss: 1.00, train_acc: 88.6% | test_loss: 1.04, test_acc: 87.7%

 epoch: 238 | train_loss: 1.03, train_acc: 87.9% | test_loss: 1.04, test_acc: 87.8%

 epoch: 239 | train_loss: 1.06, train_acc: 87.3% | test_loss: 1.06, test_acc: 87.7%


  2%|▏         | 240/15000 [00:27<28:58,  8.49it/s]


input:       factors on safety and accidents and interventions designed to improve protect worker health ohp grew out of health psychology

target:      factors on safety and accidents and interventions designed to improve protect worker health ohp grew out of health psychology industrial

prediction:  factors on safety and and and interventions designed to improve protect largest health economy grew out of health psychology the

 epoch: 240 | train_loss: 0.99, train_acc: 88.4% | test_loss: 1.05, test_acc: 87.5%

 epoch: 241 | train_loss: 1.05, train_acc: 88.0% | test_loss: 0.84, test_acc: 90.2%


  2%|▏         | 244/15000 [00:28<24:48,  9.92it/s]


 epoch: 242 | train_loss: 1.09, train_acc: 86.7% | test_loss: 1.00, test_acc: 88.6%

 epoch: 243 | train_loss: 1.05, train_acc: 87.5% | test_loss: 1.06, test_acc: 87.7%

 epoch: 244 | train_loss: 0.95, train_acc: 89.1% | test_loss: 1.02, test_acc: 88.3%


  2%|▏         | 246/15000 [00:28<23:37, 10.41it/s]


 epoch: 245 | train_loss: 1.10, train_acc: 86.8% | test_loss: 0.92, test_acc: 89.3%

 epoch: 246 | train_loss: 1.07, train_acc: 86.9% | test_loss: 0.93, test_acc: 89.2%

 epoch: 247 | train_loss: 1.05, train_acc: 87.1% | test_loss: 0.96, test_acc: 89.2%


  2%|▏         | 250/15000 [00:29<34:44,  7.08it/s]


 epoch: 248 | train_loss: 0.99, train_acc: 88.2% | test_loss: 1.14, test_acc: 86.6%

 epoch: 249 | train_loss: 0.94, train_acc: 89.2% | test_loss: 1.06, test_acc: 87.6%

input:       falklands war began and days later argentine forces surrendered colombia has had an ongoing though diminished internal conflict which

target:      falklands war began and days later argentine forces surrendered colombia has had an ongoing though diminished internal conflict which started

prediction:  classical war began and days later argentine forces age colombia has had an ongoing though imperial internal conflict which the

 epoch: 250 | train_loss: 1.00, train_acc: 87.8% | test_loss: 1.14, test_acc: 86.5%


  2%|▏         | 252/15000 [00:29<30:21,  8.10it/s]


 epoch: 251 | train_loss: 1.01, train_acc: 88.3% | test_loss: 0.90, test_acc: 89.6%

 epoch: 252 | train_loss: 0.95, train_acc: 88.5% | test_loss: 0.98, test_acc: 88.8%

 epoch: 253 | train_loss: 1.02, train_acc: 88.0% | test_loss: 0.98, test_acc: 88.2%


  2%|▏         | 256/15000 [00:29<25:11,  9.76it/s]


 epoch: 254 | train_loss: 1.00, train_acc: 88.4% | test_loss: 1.04, test_acc: 88.1%

 epoch: 255 | train_loss: 0.97, train_acc: 88.7% | test_loss: 0.94, test_acc: 88.7%

 epoch: 256 | train_loss: 0.99, train_acc: 88.2% | test_loss: 0.96, test_acc: 88.4%


  2%|▏         | 258/15000 [00:29<23:55, 10.27it/s]


 epoch: 257 | train_loss: 1.02, train_acc: 87.5% | test_loss: 1.02, test_acc: 87.8%

 epoch: 258 | train_loss: 0.98, train_acc: 87.9% | test_loss: 1.14, test_acc: 86.3%

 epoch: 259 | train_loss: 0.95, train_acc: 88.4% | test_loss: 1.02, test_acc: 88.3%


  2%|▏         | 260/15000 [00:29<24:09, 10.17it/s]


input:       follow up study by collecting samples from six different excavation sites along the entire length of the nile valley

target:      follow up study by collecting samples from six different excavation sites along the entire length of the nile valley spanning

prediction:  follow up study by another samples from six different south sites along the entire length of the nile valley the

 epoch: 260 | train_loss: 1.07, train_acc: 87.5% | test_loss: 1.11, test_acc: 87.2%

 epoch: 261 | train_loss: 0.96, train_acc: 88.9% | test_loss: 0.95, test_acc: 89.3%


  2%|▏         | 262/15000 [00:30<23:26, 10.48it/s]


 epoch: 262 | train_loss: 0.95, train_acc: 88.6% | test_loss: 1.00, test_acc: 88.8%


  2%|▏         | 264/15000 [00:30<38:16,  6.42it/s]


 epoch: 263 | train_loss: 1.04, train_acc: 88.0% | test_loss: 0.99, test_acc: 88.6%

 epoch: 264 | train_loss: 1.10, train_acc: 87.5% | test_loss: 0.85, test_acc: 89.5%

 epoch: 265 | train_loss: 0.96, train_acc: 88.6% | test_loss: 0.96, test_acc: 88.4%


  2%|▏         | 268/15000 [00:30<28:51,  8.51it/s]


 epoch: 266 | train_loss: 1.07, train_acc: 87.0% | test_loss: 0.99, test_acc: 88.6%

 epoch: 267 | train_loss: 1.09, train_acc: 87.4% | test_loss: 1.03, test_acc: 87.8%

 epoch: 268 | train_loss: 1.04, train_acc: 87.9% | test_loss: 0.99, test_acc: 88.9%


  2%|▏         | 270/15000 [00:31<27:15,  9.01it/s]


 epoch: 269 | train_loss: 1.02, train_acc: 88.1% | test_loss: 0.90, test_acc: 89.9%

input:       social philosopher ivan illich book tools for conviviality the term is also applied in philosophy to designate the fading

target:      social philosopher ivan illich book tools for conviviality the term is also applied in philosophy to designate the fading of

prediction:  social even such nature book tools for specific the term is also applied in philosophy to design the social the

 epoch: 270 | train_loss: 1.07, train_acc: 87.2% | test_loss: 0.90, test_acc: 89.4%

 epoch: 271 | train_loss: 0.99, train_acc: 88.1% | test_loss: 0.93, test_acc: 88.8%


  2%|▏         | 274/15000 [00:31<24:01, 10.22it/s]


 epoch: 272 | train_loss: 1.01, train_acc: 88.4% | test_loss: 1.03, test_acc: 88.0%

 epoch: 273 | train_loss: 0.93, train_acc: 88.6% | test_loss: 0.94, test_acc: 89.1%

 epoch: 274 | train_loss: 1.00, train_acc: 88.4% | test_loss: 1.03, test_acc: 88.1%


  2%|▏         | 276/15000 [00:31<23:23, 10.49it/s]


 epoch: 275 | train_loss: 0.99, train_acc: 88.4% | test_loss: 0.93, test_acc: 89.2%

 epoch: 276 | train_loss: 0.97, train_acc: 89.1% | test_loss: 1.03, test_acc: 87.4%


  2%|▏         | 278/15000 [00:32<38:49,  6.32it/s]


 epoch: 277 | train_loss: 0.98, train_acc: 88.5% | test_loss: 0.97, test_acc: 88.2%

 epoch: 278 | train_loss: 0.91, train_acc: 89.4% | test_loss: 0.95, test_acc: 88.5%

 epoch: 279 | train_loss: 1.00, train_acc: 88.4% | test_loss: 0.92, test_acc: 88.9%


  2%|▏         | 280/15000 [00:32<34:26,  7.12it/s]


input:       suicide has been most notable among chinese and indian women the elderly and middle aged russian men the entire

target:      suicide has been most notable among chinese and indian women the elderly and middle aged russian men the entire written

prediction:  suicide has been most notable among chinese and indian women the lies and middle future russian men the entire the

 epoch: 280 | train_loss: 0.99, train_acc: 87.8% | test_loss: 0.97, test_acc: 88.2%

 epoch: 281 | train_loss: 1.03, train_acc: 87.6% | test_loss: 1.00, test_acc: 88.5%


  2%|▏         | 284/15000 [00:32<27:16,  8.99it/s]


 epoch: 282 | train_loss: 1.08, train_acc: 87.1% | test_loss: 1.08, test_acc: 86.9%

 epoch: 283 | train_loss: 0.95, train_acc: 88.0% | test_loss: 1.08, test_acc: 87.5%

 epoch: 284 | train_loss: 0.95, train_acc: 88.4% | test_loss: 0.96, test_acc: 88.9%


  2%|▏         | 286/15000 [00:32<25:21,  9.67it/s]


 epoch: 285 | train_loss: 0.89, train_acc: 89.5% | test_loss: 0.94, test_acc: 88.9%

 epoch: 286 | train_loss: 0.99, train_acc: 88.3% | test_loss: 1.06, test_acc: 86.9%

 epoch: 287 | train_loss: 0.93, train_acc: 88.2% | test_loss: 1.00, test_acc: 88.7%


  2%|▏         | 288/15000 [00:33<24:24, 10.05it/s]


 epoch: 288 | train_loss: 1.04, train_acc: 87.4% | test_loss: 0.99, test_acc: 88.6%

 epoch: 289 | train_loss: 0.97, train_acc: 88.5% | test_loss: 1.07, test_acc: 87.6%

input:       differential equations linear algebra complex analysis etc and in computer science any physics oriented career position requires at least

target:      differential equations linear algebra complex analysis etc and in computer science any physics oriented career position requires at least an

prediction:  states equations linear their complex analysis etc and in computer science any physics oriented career position requires at least the


  2%|▏         | 290/15000 [00:33<24:37,  9.95it/s]


 epoch: 290 | train_loss: 0.98, train_acc: 88.3% | test_loss: 1.00, test_acc: 88.2%


  2%|▏         | 292/15000 [00:34<40:47,  6.01it/s]


 epoch: 291 | train_loss: 0.96, train_acc: 88.6% | test_loss: 0.96, test_acc: 88.4%

 epoch: 292 | train_loss: 0.80, train_acc: 90.9% | test_loss: 0.92, test_acc: 89.2%

 epoch: 293 | train_loss: 0.96, train_acc: 88.8% | test_loss: 0.97, test_acc: 88.6%


  2%|▏         | 296/15000 [00:34<30:05,  8.15it/s]


 epoch: 294 | train_loss: 0.88, train_acc: 89.2% | test_loss: 0.95, test_acc: 88.5%

 epoch: 295 | train_loss: 1.03, train_acc: 88.1% | test_loss: 0.91, test_acc: 89.6%

 epoch: 296 | train_loss: 0.92, train_acc: 88.8% | test_loss: 1.03, test_acc: 87.8%


  2%|▏         | 298/15000 [00:34<27:09,  9.02it/s]


 epoch: 297 | train_loss: 0.96, train_acc: 89.3% | test_loss: 0.91, test_acc: 89.9%

 epoch: 298 | train_loss: 1.08, train_acc: 87.3% | test_loss: 0.89, test_acc: 89.6%

 epoch: 299 | train_loss: 1.02, train_acc: 87.9% | test_loss: 0.98, test_acc: 88.4%


  2%|▏         | 300/15000 [00:34<26:15,  9.33it/s]


input:       and the hyksos nubian allies the kushites to the south after years of vassalage thebes gathered enough strength to

target:      and the hyksos nubian allies the kushites to the south after years of vassalage thebes gathered enough strength to challenge

prediction:  and the hyksos believed allies the psychologists to the south after years of do thebes title enough strength to of

 epoch: 300 | train_loss: 0.88, train_acc: 89.7% | test_loss: 0.98, test_acc: 88.5%

 epoch: 301 | train_loss: 0.93, train_acc: 89.1% | test_loss: 0.98, test_acc: 87.7%


  2%|▏         | 304/15000 [00:35<23:42, 10.33it/s]


 epoch: 302 | train_loss: 1.03, train_acc: 88.4% | test_loss: 0.91, test_acc: 88.4%

 epoch: 303 | train_loss: 0.96, train_acc: 87.7% | test_loss: 0.95, test_acc: 89.3%

 epoch: 304 | train_loss: 0.97, train_acc: 88.7% | test_loss: 0.91, test_acc: 89.5%

 epoch: 305 | train_loss: 0.88, train_acc: 90.0% | test_loss: 0.91, test_acc: 89.4%


  2%|▏         | 308/15000 [00:35<33:24,  7.33it/s]


 epoch: 306 | train_loss: 0.99, train_acc: 88.9% | test_loss: 0.94, test_acc: 89.0%

 epoch: 307 | train_loss: 0.96, train_acc: 88.7% | test_loss: 1.03, test_acc: 88.1%

 epoch: 308 | train_loss: 0.87, train_acc: 89.5% | test_loss: 1.01, test_acc: 88.5%


  2%|▏         | 310/15000 [00:36<30:35,  8.00it/s]


 epoch: 309 | train_loss: 0.96, train_acc: 88.7% | test_loss: 0.92, test_acc: 89.3%

input:       and there is speculation that since one of the accused lionardo de tornabuoni was related to lorenzo de medici

target:      and there is speculation that since one of the accused lionardo de tornabuoni was related to lorenzo de medici the

prediction:  and there is speculation that since one of the southern equivalent de generally was related to american de medici the

 epoch: 310 | train_loss: 0.95, train_acc: 89.3% | test_loss: 0.94, test_acc: 88.9%

 epoch: 311 | train_loss: 1.00, train_acc: 87.9% | test_loss: 0.94, test_acc: 88.8%


  2%|▏         | 314/15000 [00:36<26:37,  9.19it/s]


 epoch: 312 | train_loss: 0.94, train_acc: 89.5% | test_loss: 0.84, test_acc: 90.0%

 epoch: 313 | train_loss: 0.96, train_acc: 88.7% | test_loss: 1.00, test_acc: 88.5%

 epoch: 314 | train_loss: 0.93, train_acc: 88.7% | test_loss: 0.97, test_acc: 88.8%


  2%|▏         | 316/15000 [00:36<27:02,  9.05it/s]


 epoch: 315 | train_loss: 0.88, train_acc: 90.0% | test_loss: 1.04, test_acc: 87.0%

 epoch: 316 | train_loss: 0.90, train_acc: 89.5% | test_loss: 0.94, test_acc: 89.2%


  2%|▏         | 318/15000 [00:36<26:42,  9.16it/s]


 epoch: 317 | train_loss: 0.82, train_acc: 90.3% | test_loss: 0.94, test_acc: 88.9%

 epoch: 318 | train_loss: 0.90, train_acc: 89.4% | test_loss: 0.95, test_acc: 89.2%


  2%|▏         | 319/15000 [00:36<26:52,  9.10it/s]


 epoch: 319 | train_loss: 1.01, train_acc: 87.5% | test_loss: 0.87, test_acc: 89.8%

input:       is aung san suu kyi from burma for her peaceful and non violent struggle under military dictatorship in burma

target:      is aung san suu kyi from burma for her peaceful and non violent struggle under military dictatorship in burma she

prediction:  is ii san they leonardo from burma for her peaceful and non work struggle under military russian in burma the


  2%|▏         | 321/15000 [00:37<49:46,  4.92it/s]


 epoch: 320 | train_loss: 0.90, train_acc: 89.3% | test_loss: 0.86, test_acc: 90.2%

 epoch: 321 | train_loss: 0.83, train_acc: 90.8% | test_loss: 0.90, test_acc: 89.8%


  2%|▏         | 323/15000 [00:37<37:56,  6.45it/s]


 epoch: 322 | train_loss: 0.95, train_acc: 89.3% | test_loss: 0.93, test_acc: 89.1%

 epoch: 323 | train_loss: 0.87, train_acc: 90.2% | test_loss: 1.00, test_acc: 88.3%

 epoch: 324 | train_loss: 0.92, train_acc: 89.0% | test_loss: 0.90, test_acc: 89.7%


  2%|▏         | 326/15000 [00:38<30:08,  8.11it/s]


 epoch: 325 | train_loss: 1.07, train_acc: 87.9% | test_loss: 0.92, test_acc: 88.9%

 epoch: 326 | train_loss: 0.87, train_acc: 89.9% | test_loss: 0.93, test_acc: 89.3%

 epoch: 327 | train_loss: 0.94, train_acc: 89.2% | test_loss: 0.89, test_acc: 90.0%

  2%|▏         | 328/15000 [00:38<28:16,  8.65it/s]



 epoch: 328 | train_loss: 0.93, train_acc: 89.3% | test_loss: 0.96, test_acc: 88.6%


  2%|▏         | 330/15000 [00:38<31:06,  7.86it/s]


 epoch: 329 | train_loss: 0.96, train_acc: 88.7% | test_loss: 0.96, test_acc: 89.3%

input:       conquest of northern egypt by the hyksos around bc the hyksos were expelled from egypt and the land was

target:      conquest of northern egypt by the hyksos around bc the hyksos were expelled from egypt and the land was reunited

prediction:  conquest of northern egypt by the hyksos around bc the hyksos were sites from egypt and the land was the

 epoch: 330 | train_loss: 0.94, train_acc: 88.7% | test_loss: 0.96, test_acc: 88.8%


  2%|▏         | 332/15000 [00:38<28:00,  8.73it/s]


 epoch: 331 | train_loss: 0.88, train_acc: 90.1% | test_loss: 0.86, test_acc: 89.3%

 epoch: 332 | train_loss: 0.98, train_acc: 87.7% | test_loss: 0.87, test_acc: 90.1%


  2%|▏         | 334/15000 [00:39<27:29,  8.89it/s]


 epoch: 333 | train_loss: 0.91, train_acc: 89.0% | test_loss: 0.90, test_acc: 89.7%

 epoch: 334 | train_loss: 0.89, train_acc: 90.1% | test_loss: 0.90, test_acc: 89.3%


  2%|▏         | 337/15000 [00:39<34:59,  6.98it/s]


 epoch: 335 | train_loss: 0.83, train_acc: 90.1% | test_loss: 0.84, test_acc: 91.1%

 epoch: 336 | train_loss: 1.14, train_acc: 86.5% | test_loss: 0.92, test_acc: 89.4%

 epoch: 337 | train_loss: 0.92, train_acc: 88.8% | test_loss: 0.82, test_acc: 91.2%


  2%|▏         | 340/15000 [00:39<29:00,  8.42it/s]


 epoch: 338 | train_loss: 0.95, train_acc: 88.3% | test_loss: 0.88, test_acc: 89.7%

 epoch: 339 | train_loss: 0.98, train_acc: 88.7% | test_loss: 0.93, test_acc: 88.4%

input:       also generated half of the continent gdp and has become the continent first regional power most of the population

target:      also generated half of the continent gdp and has become the continent first regional power most of the population lives

prediction:  also generated half of the continent gdp and has become the continent first regional power most of the population of

 epoch: 340 | train_loss: 0.84, train_acc: 90.1% | test_loss: 0.83, test_acc: 90.3%


  2%|▏         | 342/15000 [00:40<25:37,  9.54it/s]


 epoch: 341 | train_loss: 0.91, train_acc: 89.2% | test_loss: 0.84, test_acc: 90.1%

 epoch: 342 | train_loss: 0.89, train_acc: 89.2% | test_loss: 1.00, test_acc: 88.8%

 epoch: 343 | train_loss: 0.94, train_acc: 89.2% | test_loss: 0.91, test_acc: 88.7%


  2%|▏         | 346/15000 [00:40<22:57, 10.64it/s]


 epoch: 344 | train_loss: 0.85, train_acc: 90.2% | test_loss: 0.94, test_acc: 88.7%

 epoch: 345 | train_loss: 0.93, train_acc: 88.3% | test_loss: 0.89, test_acc: 89.8%

 epoch: 346 | train_loss: 0.93, train_acc: 89.5% | test_loss: 0.88, test_acc: 90.0%

 epoch: 347 | train_loss: 0.87, train_acc: 89.6% | test_loss: 0.92, test_acc: 89.8%


  2%|▏         | 348/15000 [00:40<35:49,  6.82it/s]


 epoch: 348 | train_loss: 0.91, train_acc: 89.5% | test_loss: 0.86, test_acc: 89.6%

 epoch: 349 | train_loss: 0.99, train_acc: 87.8% | test_loss: 0.92, test_acc: 88.9%

input:       student adam mari he defined the elements of national economy products are offered at natural price generated by the

target:      student adam mari he defined the elements of national economy products are offered at natural price generated by the use

prediction:  student vary behavior he defined the elements of national economy products are offered at natural price generated by the the


  2%|▏         | 352/15000 [00:41<28:27,  8.58it/s]


 epoch: 350 | train_loss: 0.91, train_acc: 89.4% | test_loss: 0.78, test_acc: 90.9%

 epoch: 351 | train_loss: 0.80, train_acc: 90.9% | test_loss: 0.88, test_acc: 90.1%

 epoch: 352 | train_loss: 0.90, train_acc: 88.9% | test_loss: 1.02, test_acc: 88.1%


  2%|▏         | 354/15000 [00:41<25:57,  9.41it/s]


 epoch: 353 | train_loss: 0.85, train_acc: 90.3% | test_loss: 0.91, test_acc: 89.1%

 epoch: 354 | train_loss: 1.00, train_acc: 88.1% | test_loss: 0.88, test_acc: 89.6%

 epoch: 355 | train_loss: 0.84, train_acc: 89.8% | test_loss: 0.88, test_acc: 89.9%


  2%|▏         | 358/15000 [00:41<22:50, 10.68it/s]


 epoch: 356 | train_loss: 0.92, train_acc: 89.0% | test_loss: 0.88, test_acc: 89.9%

 epoch: 357 | train_loss: 0.90, train_acc: 89.2% | test_loss: 0.88, test_acc: 89.5%

 epoch: 358 | train_loss: 0.87, train_acc: 89.7% | test_loss: 0.93, test_acc: 89.7%


  2%|▏         | 360/15000 [00:42<23:25, 10.41it/s]


 epoch: 359 | train_loss: 0.91, train_acc: 89.0% | test_loss: 0.85, test_acc: 90.1%

input:       in mesopotamia taking the cities of babylon seleucia and finally the capital of ctesiphon in while suppressing the kitos

target:      in mesopotamia taking the cities of babylon seleucia and finally the capital of ctesiphon in while suppressing the kitos war

prediction:  in mesopotamia taking the cities of different methods and finally the capital of darwin in while northern the nature the

 epoch: 360 | train_loss: 0.90, train_acc: 89.4% | test_loss: 0.93, test_acc: 88.8%


  2%|▏         | 362/15000 [00:42<22:52, 10.67it/s]


 epoch: 361 | train_loss: 0.84, train_acc: 89.5% | test_loss: 0.88, test_acc: 90.0%

 epoch: 362 | train_loss: 0.87, train_acc: 89.8% | test_loss: 0.89, test_acc: 89.6%


  2%|▏         | 364/15000 [00:42<39:37,  6.16it/s]


 epoch: 363 | train_loss: 0.82, train_acc: 90.6% | test_loss: 0.87, test_acc: 89.5%

 epoch: 364 | train_loss: 0.89, train_acc: 89.5% | test_loss: 0.85, test_acc: 89.7%

 epoch: 365 | train_loss: 0.87, train_acc: 90.1% | test_loss: 0.84, test_acc: 89.9%


  2%|▏         | 368/15000 [00:43<29:24,  8.29it/s]


 epoch: 366 | train_loss: 0.90, train_acc: 89.7% | test_loss: 0.93, test_acc: 88.9%

 epoch: 367 | train_loss: 0.92, train_acc: 89.6% | test_loss: 0.87, test_acc: 90.2%

 epoch: 368 | train_loss: 0.88, train_acc: 89.4% | test_loss: 0.87, test_acc: 89.9%


  2%|▏         | 370/15000 [00:43<27:27,  8.88it/s]


 epoch: 369 | train_loss: 0.92, train_acc: 88.8% | test_loss: 0.88, test_acc: 89.9%

input:       set up and punchline often adding twist topper or tagline for an intensified or extra laugh delivery relies on

target:      set up and punchline often adding twist topper or tagline for an intensified or extra laugh delivery relies on the

prediction:  set up and

 epoch: 370 | train_loss: 0.88, train_acc: 90.0% | test_loss: 1.02, test_acc: 87.7%

 epoch: 371 | train_loss: 0.90, train_acc: 89.6% | test_loss: 0.87, test_acc: 89.7%


  2%|▏         | 374/15000 [00:43<23:59, 10.16it/s]


 epoch: 372 | train_loss: 0.86, train_acc: 89.9% | test_loss: 0.92, test_acc: 89.0%

 epoch: 373 | train_loss: 0.88, train_acc: 90.0% | test_loss: 0.90, test_acc: 89.5%

 epoch: 374 | train_loss: 0.87, train_acc: 89.8% | test_loss: 0.90, test_acc: 89.4%


  3%|▎         | 376/15000 [00:43<23:00, 10.59it/s]


 epoch: 375 | train_loss: 0.92, train_acc: 89.2% | test_loss: 0.92, test_acc: 89.1%

 epoch: 376 | train_loss: 0.95, train_acc: 89.1% | test_loss: 0.88, test_acc: 89.7%

 epoch: 377 | train_loss: 0.80, train_acc: 90.8% | test_loss: 0.89, test_acc: 89.6%


  3%|▎         | 380/15000 [00:44<33:36,  7.25it/s]


 epoch: 378 | train_loss: 0.91, train_acc: 89.4% | test_loss: 0.89, test_acc: 89.6%

 epoch: 379 | train_loss: 0.89, train_acc: 90.0% | test_loss: 0.86, test_acc: 89.9%

input:       the specified period and the incidence of default is very low the dalai lama has received approximately eighty four

target:      the specified period and the incidence of default is very low the dalai lama has received approximately eighty four awards

prediction:  the specified period and the computer of default is very low the rome posts has received approximately studied four the

 epoch: 380 | train_loss: 0.80, train_acc: 90.5% | test_loss: 0.91, test_acc: 89.6%


  3%|▎         | 382/15000 [00:44<29:35,  8.23it/s]


 epoch: 381 | train_loss: 0.83, train_acc: 89.9% | test_loss: 0.88, test_acc: 89.0%

 epoch: 382 | train_loss: 0.99, train_acc: 88.5% | test_loss: 0.88, test_acc: 89.8%

 epoch: 383 | train_loss: 0.88, train_acc: 89.5% | test_loss: 0.89, test_acc: 89.3%


  3%|▎         | 386/15000 [00:45<24:03, 10.13it/s]


 epoch: 384 | train_loss: 0.92, train_acc: 89.6% | test_loss: 0.90, test_acc: 89.9%

 epoch: 385 | train_loss: 0.88, train_acc: 89.8% | test_loss: 0.84, test_acc: 90.0%

 epoch: 386 | train_loss: 0.89, train_acc: 89.7% | test_loss: 0.99, test_acc: 88.3%


  3%|▎         | 388/15000 [00:45<23:02, 10.57it/s]


 epoch: 387 | train_loss: 0.92, train_acc: 89.3% | test_loss: 0.90, test_acc: 89.3%

 epoch: 388 | train_loss: 0.88, train_acc: 89.8% | test_loss: 0.85, test_acc: 89.8%

 epoch: 389 | train_loss: 0.82, train_acc: 90.4% | test_loss: 0.84, test_acc: 89.9%


  3%|▎         | 390/15000 [00:45<23:33, 10.34it/s]


input:       started in the with extreme poverty now below the threefold division of the old world into europe asia and

target:      started in the with extreme poverty now below the threefold division of the old world into europe asia and africa

prediction:  started in the with extreme poverty now below the covers division of the old world into europe asia and the

 epoch: 390 | train_loss: 1.02, train_acc: 88.5% | test_loss: 0.96, test_acc: 88.7%


  3%|▎         | 392/15000 [00:46<37:29,  6.49it/s]


 epoch: 391 | train_loss: 0.83, train_acc: 90.5% | test_loss: 0.82, test_acc: 90.6%

 epoch: 392 | train_loss: 0.92, train_acc: 89.4% | test_loss: 0.88, test_acc: 90.0%

 epoch: 393 | train_loss: 0.83, train_acc: 90.4% | test_loss: 0.85, test_acc: 90.2%


  3%|▎         | 396/15000 [00:46<27:56,  8.71it/s]


 epoch: 394 | train_loss: 0.86, train_acc: 90.0% | test_loss: 0.77, test_acc: 90.5%

 epoch: 395 | train_loss: 0.89, train_acc: 89.5% | test_loss: 0.83, test_acc: 90.8%

 epoch: 396 | train_loss: 0.89, train_acc: 89.2% | test_loss: 0.95, test_acc: 88.9%


  3%|▎         | 398/15000 [00:46<25:07,  9.69it/s]


 epoch: 397 | train_loss: 0.88, train_acc: 89.7% | test_loss: 0.83, test_acc: 90.6%

 epoch: 398 | train_loss: 0.95, train_acc: 88.9% | test_loss: 0.91, test_acc: 89.5%

 epoch: 399 | train_loss: 0.86, train_acc: 90.0% | test_loss: 0.79, test_acc: 90.8%

input:       formation of the transantarctic mountains and antarctopelta trinisaura morrosaurus and imperobator from late cretaceous of the antarctic peninsula have


  3%|▎         | 400/15000 [00:46<24:51,  9.79it/s]


target:      formation of the transantarctic mountains and antarctopelta trinisaura morrosaurus and imperobator from late cretaceous of the antarctic peninsula have been

prediction:  formation of the transantarctic mountains and morphemes ended resulting and therapy from late cretaceous of the antarctic peninsula have the

 epoch: 400 | train_loss: 0.92, train_acc: 89.0% | test_loss: 0.85, test_acc: 89.8%

 epoch: 401 | train_loss: 0.96, train_acc: 88.9% | test_loss: 0.82, test_acc: 90.4%


  3%|▎         | 404/15000 [00:47<22:41, 10.72it/s]


 epoch: 402 | train_loss: 0.87, train_acc: 89.6% | test_loss: 0.85, test_acc: 89.4%

 epoch: 403 | train_loss: 0.91, train_acc: 89.0% | test_loss: 0.88, test_acc: 89.8%

 epoch: 404 | train_loss: 0.82, train_acc: 90.8% | test_loss: 0.81, test_acc: 90.2%


  3%|▎         | 406/15000 [00:47<21:59, 11.06it/s]


 epoch: 405 | train_loss: 0.92, train_acc: 89.6% | test_loss: 0.73, test_acc: 91.8%

 epoch: 406 | train_loss: 0.78, train_acc: 90.6% | test_loss: 0.88, test_acc: 89.1%


  3%|▎         | 408/15000 [00:47<37:00,  6.57it/s]


 epoch: 407 | train_loss: 0.85, train_acc: 89.6% | test_loss: 0.82, test_acc: 90.2%

 epoch: 408 | train_loss: 0.85, train_acc: 90.3% | test_loss: 0.80, test_acc: 90.8%

 epoch: 409 | train_loss: 0.87, train_acc: 89.9% | test_loss: 0.92, test_acc: 89.2%

input:      

  3%|▎         | 412/15000 [00:48<28:20,  8.58it/s]

 latin american guyana was initially colonized by the dutch before coming under british control though there was brief period

target:      latin american guyana was initially colonized by the dutch before coming under british control though there was brief period during

prediction:  latin american guyana was initially area by the dutch before coming under british control though there was brief period the

 epoch: 410 | train_loss: 0.90, train_acc: 89.5% | test_loss: 0.78, test_acc: 91.1%

 epoch: 411 | train_loss: 0.89, train_acc: 89.4% | test_loss: 0.85, test_acc: 90.7%

 epoch: 412 | train_loss: 0.92, train_acc: 89.0% | test_loss: 0.86, test_acc: 89.5%


  3%|▎         | 414/15000 [00:48<25:38,  9.48it/s]


 epoch: 413 | train_loss: 0.92, train_acc: 89.5% | test_loss: 0.96, test_acc: 88.4%

 epoch: 414 | train_loss: 0.80, train_acc: 89.9% | test_loss: 0.98, test_acc: 88.7%

 epoch: 415 | train_loss: 0.85, train_acc: 90.0% | test_loss: 0.83, test_acc: 89.8%


  3%|▎         | 418/15000 [00:48<22:45, 10.68it/s]


 epoch: 416 | train_loss: 0.86, train_acc: 90.1% | test_loss: 0.90, test_acc: 89.1%

 epoch: 417 | train_loss: 0.84, train_acc: 90.0% | test_loss: 0.85, test_acc: 90.1%

 epoch: 418 | train_loss: 0.83, train_acc: 90.2% | test_loss: 0.77, test_acc: 90.8%


  3%|▎         | 420/15000 [00:48<23:21, 10.40it/s]


 epoch: 419 | train_loss: 0.87, train_acc: 89.8% | test_loss: 0.88, test_acc: 90.1%

input:       years ago continents formed then broke up and reformed as the surface of earth reshaped over hundreds of millions

target:      years ago continents formed then broke up and reformed as the surface of earth reshaped over hundreds of millions of

prediction:  years ago continents formed then broke up and diocletian as the surface of earth experimental over hundreds of millions the

 epoch: 420 | train_loss: 0.86, train_acc: 90.3% | test_loss: 0.93, test_acc: 89.1%


  3%|▎         | 422/15000 [00:49<38:28,  6.31it/s]


 epoch: 421 | train_loss: 0.88, train_acc: 89.2% | test_loss: 0.89, test_acc: 89.3%

 epoch: 422 | train_loss: 0.86, train_acc: 89.6% | test_loss: 0.93, test_acc: 88.7%

 epoch: 423 | train_loss: 0.76, train_acc: 91.0% | test_loss: 0.85, test_acc: 89.6%


  3%|▎         | 426/15000 [00:49<30:58,  7.84it/s]


 epoch: 424 | train_loss: 0.84, train_acc: 89.3% | test_loss: 0.91, test_acc: 89.1%

 epoch: 425 | train_loss: 0.83, train_acc: 90.2% | test_loss: 0.84, test_acc: 90.0%

 epoch: 426 | train_loss: 0.88, train_acc: 89.6% | test_loss: 0.81, test_acc: 90.3%


  3%|▎         | 429/15000 [00:50<28:00,  8.67it/s]


 epoch: 427 | train_loss: 0.81, train_acc: 90.6% | test_loss: 0.76, test_acc: 91.0%

 epoch: 428 | train_loss: 0.84, train_acc: 90.2% | test_loss: 0.88, test_acc: 89.6%

 epoch: 429 | train_loss: 0.90, train_acc: 89.3% | test_loss: 0.79, test_acc: 90.9%


  3%|▎         | 431/15000 [00:50<28:42,  8.46it/s]


input:       those parts that programmer will find useful and keeping them consistent even if the internal details change later an

target:      those parts that programmer will find useful and keeping them consistent even if the internal details change later an api

prediction:  those parts that programmer will find useful and them them consistent even if the internal details change later an the

 epoch: 430 | train_loss: 0.90, train_acc: 89.5% | test_loss: 0.90, test_acc: 89.6%

 epoch: 431 | train_loss: 0.80, train_acc: 90.2% | test_loss: 0.91, test_acc: 89.4%


  3%|▎         | 433/15000 [00:50<27:09,  8.94it/s]


 epoch: 432 | train_loss: 0.76, train_acc: 91.1% | test_loss: 0.85, test_acc: 90.2%

 epoch: 433 | train_loss: 0.79, train_acc: 90.8% | test_loss: 0.97, test_acc: 87.8%


  3%|▎         | 434/15000 [00:50<26:57,  9.01it/s]


 epoch: 434 | train_loss: 0.82, train_acc: 90.2% | test_loss: 0.85, test_acc: 90.2%


  3%|▎         | 436/15000 [00:51<37:05,  6.54it/s]


 epoch: 435 | train_loss: 0.83, train_acc: 89.9% | test_loss: 0.87, test_acc: 89.6%

 epoch: 436 | train_loss: 0.90, train_acc: 89.4% | test_loss: 0.86, test_acc: 90.1%

 epoch: 437 | train_loss: 0.79, train_acc: 91.0% | test_loss: 0.88, test_acc: 89.5%


  3%|▎         | 438/15000 [00:51<30:50,  7.87it/s]


 epoch: 438 | train_loss: 0.83, train_acc: 90.4% | test_loss: 0.88, test_acc: 89.5%

 epoch: 439 | train_loss: 0.77, train_acc: 91.0% | test_loss: 0.82, test_acc: 89.9%

input:       andr deluc in and introduced as fixed term by horace dict de saussure in the word is derived

target:      andr deluc in and introduced as fixed term by horace dict de saussure in the word is derived from

prediction:  big than in and introduced as fixed term by alexander multiple de saussure in the word is derived the


  3%|▎         | 442/15000 [00:51<27:41,  8.76it/s]


 epoch: 440 | train_loss: 0.90, train_acc: 89.2% | test_loss: 0.92, test_acc: 88.7%

 epoch: 441 | train_loss: 0.82, train_acc: 90.2% | test_loss: 0.79, test_acc: 90.8%

 epoch: 442 | train_loss: 0.88, train_acc: 89.7% | test_loss: 0.88, test_acc: 89.3%


  3%|▎         | 445/15000 [00:52<25:40,  9.45it/s]


 epoch: 443 | train_loss: 0.79, train_acc: 91.2% | test_loss: 0.87, test_acc: 89.6%

 epoch: 444 | train_loss: 0.94, train_acc: 88.5% | test_loss: 0.85, test_acc: 89.7%

 epoch: 445 | train_loss: 0.79, train_acc: 90.8% | test_loss: 0.91, test_acc: 89.5%


  3%|▎         | 447/15000 [00:52<25:19,  9.58it/s]


 epoch: 446 | train_loss: 0.79, train_acc: 90.7% | test_loss: 0.82, test_acc: 90.5%

 epoch: 447 | train_loss: 0.84, train_acc: 90.2% | test_loss: 0.85, test_acc: 89.9%


  3%|▎         | 448/15000 [00:52<25:21,  9.56it/s]


 epoch: 448 | train_loss: 0.90, train_acc: 89.3% | test_loss: 0.80, test_acc: 90.4%


  3%|▎         | 450/15000 [00:53<48:12,  5.03it/s]


 epoch: 449 | train_loss: 0.89, train_acc: 89.4% | test_loss: 0.73, test_acc: 92.0%

input:       ancient pangaea supercontinent along with landmasses such as the americas and afro eurasia volcanic islands which are geologically associated

target:      ancient pangaea supercontinent along with landmasses such as the americas and afro eurasia volcanic islands which are geologically associated with

prediction:  ancient specifically praetorian along with landmasses such as the americas and afro eurasia volcanic islands which are geologically associated the

 epoch: 450 | train_loss: 0.89, train_acc: 89.3% | test_loss: 0.84, test_acc: 90.3%


  3%|▎         | 452/15000 [00:53<35:55,  6.75it/s]


 epoch: 451 | train_loss: 0.85, train_acc: 90.1% | test_loss: 0.81, test_acc: 90.8%

 epoch: 452 | train_loss: 0.83, train_acc: 90.7% | test_loss: 0.76, test_acc: 90.8%

 epoch: 453 | train_loss: 0.87, train_acc: 89.7% | test_loss: 0.75, test_acc: 90.9%


  3%|▎         | 456/15000 [00:53<26:10,  9.26it/s]


 epoch: 454 | train_loss: 0.86, train_acc: 89.7% | test_loss: 0.84, test_acc: 90.2%

 epoch: 455 | train_loss: 0.95, train_acc: 88.7% | test_loss: 0.84, test_acc: 90.7%

 epoch: 456 | train_loss: 0.80, train_acc: 90.2% | test_loss: 0.84, test_acc: 90.5%


  3%|▎         | 458/15000 [00:53<23:53, 10.14it/s]


 epoch: 457 | train_loss: 0.81, train_acc: 90.7% | test_loss: 0.86, test_acc: 90.3%

 epoch: 458 | train_loss: 0.83, train_acc: 90.3% | test_loss: 0.87, test_acc: 89.9%

 epoch: 459 | train_loss: 0.86, train_acc: 90.1% | test_loss: 0.93, test_acc: 89.6%


  3%|▎         | 460/15000 [00:53<24:16,  9.98it/s]


input:       evolve among active and frequent internet users it remains distinct from other previously offline cultures and subcultures which now

target:      evolve among active and frequent internet users it remains distinct from other previously offline cultures and subcultures which now have

prediction:  evolve among active and frequent internet users it remains distinct from other previously offline cultures and subcultures which now the

 epoch: 460 | train_loss: 0.79, train_acc: 90.9% | test_loss: 0.67, test_acc: 92.5%

 epoch: 461 | train_loss: 0.77, train_acc: 90.8% | test_loss: 0.81, test_acc: 90.6%


  3%|▎         | 462/15000 [00:54<23:36, 10.27it/s]


 epoch: 462 | train_loss: 0.76, train_acc: 90.9% | test_loss: 0.82, test_acc: 90.6%

 epoch: 463 | train_loss: 0.83, train_acc: 90.1% | test_loss: 0.88, test_acc: 89.7%


  3%|▎         | 466/15000 [00:54<33:17,  7.28it/s]


 epoch: 464 | train_loss: 0.83, train_acc: 90.0% | test_loss: 0.86, test_acc: 89.9%

 epoch: 465 | train_loss: 0.69, train_acc: 92.3% | test_loss: 0.79, test_acc: 90.2%

 epoch: 466 | train_loss: 0.84, train_acc: 90.4% | test_loss: 0.87, test_acc: 89.7%


  3%|▎         | 469/15000 [00:55<27:27,  8.82it/s]


 epoch: 467 | train_loss: 0.77, train_acc: 91.2% | test_loss: 0.79, test_acc: 90.6%

 epoch: 468 | train_loss: 0.71, train_acc: 92.0% | test_loss: 0.80, test_acc: 90.8%

 epoch: 469 | train_loss: 0.81, train_acc: 90.7% | test_loss: 0.74, test_acc: 91.3%


  3%|▎         | 471/15000 [00:55<26:13,  9.23it/s]


input:       the caterina buti del vacca who married the local artisan antonio di piero buti del vacca nicknamed accattabriga the

target:      the caterina buti del vacca who married the local artisan antonio di piero buti del vacca nicknamed accattabriga the quarrelsome

prediction:  the game fishing del lived who married the local augustus antonio di piero fishing in lived class equivalent the the

 epoch: 470 | train_loss: 0.83, train_acc: 90.0% | test_loss: 0.82, test_acc: 90.5%

 epoch: 471 | train_loss: 0.77, train_acc: 91.2% | test_loss: 0.79, test_acc: 90.6%


  3%|▎         | 473/15000 [00:55<24:25,  9.91it/s]


 epoch: 472 | train_loss: 0.86, train_acc: 90.0% | test_loss: 0.83, test_acc: 90.5%

 epoch: 473 | train_loss: 0.78, train_acc: 91.0% | test_loss: 0.84, test_acc: 90.5%

 epoch: 474 | train_loss: 0.84, train_acc: 89.8% | test_loss: 0.88, test_acc: 89.8%


  3%|▎         | 477/15000 [00:55<22:07, 10.94it/s]


 epoch: 475 | train_loss: 0.79, train_acc: 90.9% | test_loss: 0.84, test_acc: 90.5%

 epoch: 476 | train_loss: 0.76, train_acc: 91.2% | test_loss: 0.87, test_acc: 89.7%

 epoch: 477 | train_loss: 0.78, train_acc: 90.8% | test_loss: 0.83, test_acc: 90.0%


  3%|▎         | 479/15000 [00:56<22:18, 10.85it/s]


 epoch: 478 | train_loss: 0.75, train_acc: 91.3% | test_loss: 0.86, test_acc: 89.6%

 epoch: 479 | train_loss: 0.83, train_acc: 89.7% | test_loss: 0.76, test_acc: 91.1%

input:       significant difference between the three groups there are also potential relationships between humour and having healthy immune system siga

target:      significant difference between the three groups there are also potential relationships between humour and having healthy immune system siga is

prediction:  significant difference between the three groups there are also potential relationships between humour and having healthy immune system expansion the


  3%|▎         | 481/15000 [00:56<22:39, 10.68it/s]


 epoch: 480 | train_loss: 0.83, train_acc: 89.9% | test_loss: 0.75, test_acc: 91.3%

 epoch: 481 | train_loss: 0.82, train_acc: 90.0% | test_loss: 0.75, test_acc: 90.6%

 epoch: 482 | train_loss: 0.79, train_acc: 91.1% | test_loss: 0.85, test_acc: 90.0%


  3%|▎         | 485/15000 [00:56<20:59, 11.52it/s]


 epoch: 483 | train_loss: 0.80, train_acc: 90.6% | test_loss: 0.80, test_acc: 90.2%

 epoch: 484 | train_loss: 0.72, train_acc: 91.3% | test_loss: 0.80, test_acc: 90.5%

 epoch: 485 | train_loss: 0.87, train_acc: 89.7% | test_loss: 0.85, test_acc: 89.5%


  3%|▎         | 487/15000 [00:56<20:32, 11.77it/s]


 epoch: 486 | train_loss: 0.87, train_acc: 89.5% | test_loss: 0.85, test_acc: 89.9%

 epoch: 487 | train_loss: 0.75, train_acc: 90.9% | test_loss: 0.81, test_acc: 89.9%

 epoch: 488 | train_loss: 0.77, train_acc: 90.7% | test_loss: 0.76, test_acc: 91.3%


  3%|▎         | 489/15000 [00:56<20:34, 11.76it/s]


 epoch: 489 | train_loss: 0.76, train_acc: 91.6% | test_loss: 0.81, test_acc: 90.3%

input:       the young leonardo was the first to suggest making the arno river navigable channel between florence and pisa in

target:      the young leonardo was the first to suggest making the arno river navigable channel between florence and pisa in january

prediction:  the young leonardo was the first to suggest making the arno river typical channel between florence and english in the

 epoch: 490 | train_loss: 0.77, train_acc: 90.8% | test_loss: 0.79, test_acc: 91.1%


  3%|▎         | 491/15000 [00:57<22:10, 10.91it/s]


 epoch: 491 | train_loss: 0.78, train_acc: 90.9% | test_loss: 0.79, test_acc: 90.8%


  3%|▎         | 493/15000 [00:57<38:52,  6.22it/s]


 epoch: 492 | train_loss: 0.80, train_acc: 89.9% | test_loss: 0.82, test_acc: 90.9%

 epoch: 493 | train_loss: 0.84, train_acc: 90.8% | test_loss: 0.75, test_acc: 91.5%

 epoch: 494 | train_loss: 0.75, train_acc: 90.7% | test_loss: 0.70, test_acc: 91.6%


  3%|▎         | 497/15000 [00:58<28:56,  8.35it/s]


 epoch: 495 | train_loss: 0.77, train_acc: 91.1% | test_loss: 0.78, test_acc: 90.9%

 epoch: 496 | train_loss: 0.85, train_acc: 89.9% | test_loss: 0.82, test_acc: 90.5%

 epoch: 497 | train_loss: 0.71, train_acc: 92.0% | test_loss: 0.80, test_acc: 90.5%


  3%|▎         | 499/15000 [00:58<26:07,  9.25it/s]


 epoch: 498 | train_loss: 0.80, train_acc: 90.2% | test_loss: 0.80, test_acc: 90.4%

 epoch: 499 | train_loss: 0.71, train_acc: 91.3% | test_loss: 0.78, test_acc: 91.0%

input:       and global threats notably pollution the invasion of non native species and the various effects of climate change captain

target:      and global threats notably pollution the invasion of non native species and the various effects of climate change captain james

prediction:  and global threats notably pollution the invasion of non native species and the various effects of climate change practical the

 epoch: 500 | train_loss: 0.83, train_acc: 90.0% | test_loss: 0.79, test_acc: 90.8%


  3%|▎         | 503/15000 [00:58<23:48, 10.15it/s]


 epoch: 501 | train_loss: 0.80, train_acc: 90.7% | test_loss: 0.75, test_acc: 91.1%

 epoch: 502 | train_loss: 0.76, train_acc: 91.0% | test_loss: 0.80, test_acc: 89.9%

 epoch: 503 | train_loss: 0.74, train_acc: 91.0% | test_loss: 0.83, test_acc: 90.1%


  3%|▎         | 505/15000 [00:58<22:57, 10.52it/s]


 epoch: 504 | train_loss: 0.78, train_acc: 90.8% | test_loss: 0.74, test_acc: 91.2%

 epoch: 505 | train_loss: 0.84, train_acc: 90.0% | test_loss: 0.89, test_acc: 89.8%

 epoch: 506 | train_loss: 0.74, train_acc: 91.5% | test_loss: 0.81, test_acc: 90.1%


  3%|▎         | 509/15000 [00:59<32:49,  7.36it/s]


 epoch: 507 | train_loss: 0.73, train_acc: 91.9% | test_loss: 0.77, test_acc: 91.1%

 epoch: 508 | train_loss: 0.80, train_acc: 90.2% | test_loss: 0.90, test_acc: 89.0%

 epoch: 509 | train_loss: 0.74, train_acc: 91.4% | test_loss: 0.94, test_acc: 89.3%


  3%|▎         | 510/15000 [00:59<31:50,  7.58it/s]


input:       well as standardized tests to assess the students progress it also affected teachers by setting in place institutions and

target:      well as standardized tests to assess the students progress it also affected teachers by setting in place institutions and norms

prediction:  well as functionality tests to assess the students progress it also affected teachers by setting in place institutions and the

 epoch: 510 | train_loss: 0.76, train_acc: 91.0% | test_loss: 0.78, test_acc: 91.3%

 epoch: 511 | train_loss: 0.73, train_acc: 91.3% | test_loss: 0.78, test_acc: 90.6%


  3%|▎         | 514/15000 [01:00<24:45,  9.75it/s]


 epoch: 512 | train_loss: 0.75, train_acc: 91.1% | test_loss: 0.83, test_acc: 90.0%

 epoch: 513 | train_loss: 0.83, train_acc: 89.9% | test_loss: 0.81, test_acc: 90.6%

 epoch: 514 | train_loss: 0.74, train_acc: 91.1% | test_loss: 0.80, test_acc: 90.7%


  3%|▎         | 516/15000 [01:00<23:20, 10.34it/s]


 epoch: 515 | train_loss: 0.81, train_acc: 90.5% | test_loss: 0.70, test_acc: 91.7%

 epoch: 516 | train_loss: 0.69, train_acc: 91.8% | test_loss: 0.79, test_acc: 91.1%

 epoch: 517 | train_loss: 0.81, train_acc: 90.3% | test_loss: 0.80, test_acc: 90.4%


  3%|▎         | 518/15000 [01:00<22:33, 10.70it/s]


 epoch: 518 | train_loss: 0.81, train_acc: 90.5% | test_loss: 0.85, test_acc: 90.5%

 epoch: 519 | train_loss: 0.80, train_acc: 90.4% | test_loss: 0.82, test_acc: 90.5%

input:       between the three groups there are also potential relationships between humour and having healthy immune system siga is type

target:      between the three groups there are also potential relationships between humour and having healthy immune system siga is type of

prediction:  between the three groups there are also potential relationships between humour and having healthy immune system expansion is type the


  3%|▎         | 520/15000 [01:00<23:05, 10.45it/s]


 epoch: 520 | train_loss: 0.79, train_acc: 90.8% | test_loss: 0.82, test_acc: 90.6%


  3%|▎         | 522/15000 [01:01<38:35,  6.25it/s]


 epoch: 521 | train_loss: 0.78, train_acc: 90.7% | test_loss: 0.79, test_acc: 90.9%

 epoch: 522 | train_loss: 0.72, train_acc: 91.9% | test_loss: 0.68, test_acc: 92.1%

 epoch: 523 | train_loss: 0.85, train_acc: 90.1% | test_loss: 0.78, test_acc: 90.9%


  4%|▎         | 526/15000 [01:01<28:27,  8.48it/s]


 epoch: 524 | train_loss: 0.74, train_acc: 91.1% | test_loss: 0.79, test_acc: 90.9%

 epoch: 525 | train_loss: 0.83, train_acc: 90.0% | test_loss: 0.87, test_acc: 89.8%

 epoch: 526 | train_loss: 0.79, train_acc: 90.4% | test_loss: 0.76, test_acc: 90.9%


  4%|▎         | 528/15000 [01:01<25:38,  9.40it/s]


 epoch: 527 | train_loss: 0.71, train_acc: 91.6% | test_loss: 0.81, test_acc: 90.5%

 epoch: 528 | train_loss: 0.72, train_acc: 92.0% | test_loss: 0.78, test_acc: 91.1%

 epoch: 529 | train_loss: 0.74, train_acc: 90.9% | test_loss: 0.70, test_acc: 91.7%


  4%|▎         | 530/15000 [01:01<25:03,  9.63it/s]


input:       emperor adoptive father julius caesar rome had begun expanding shortly after the founding of the roman republic in the

target:      emperor adoptive father julius caesar rome had begun expanding shortly after the founding of the roman republic in the th

prediction:  emperor adoptive father julius caesar rome had begun expanding shortly after the founding of the roman republic in the the

 epoch: 530 | train_loss: 0.81, train_acc: 90.4% | test_loss: 0.75, test_acc: 91.1%

 epoch: 531 | train_loss: 0.71, train_acc: 91.7% | test_loss: 0.73, test_acc: 91.5%


  4%|▎         | 534/15000 [01:02<22:42, 10.62it/s]


 epoch: 532 | train_loss: 0.88, train_acc: 89.5% | test_loss: 0.74, test_acc: 91.1%

 epoch: 533 | train_loss: 0.75, train_acc: 91.1% | test_loss: 0.77, test_acc: 90.7%

 epoch: 534 | train_loss: 0.73, train_acc: 91.4% | test_loss: 0.71, test_acc: 91.8%

 epoch: 535 | train_loss: 0.75, train_acc: 91.2% | test_loss: 0.75, test_acc: 91.5%


  4%|▎         | 538/15000 [01:02<32:21,  7.45it/s]


 epoch: 536 | train_loss: 0.70, train_acc: 92.0% | test_loss: 0.81, test_acc: 90.4%

 epoch: 537 | train_loss: 0.76, train_acc: 90.7% | test_loss: 0.80, test_acc: 90.3%

 epoch: 538 | train_loss: 0.70, train_acc: 91.7% | test_loss: 0.78, test_acc: 90.7%


  4%|▎         | 540/15000 [01:03<31:36,  7.62it/s]


 epoch: 539 | train_loss: 0.81, train_acc: 90.2% | test_loss: 0.74, test_acc: 91.3%

input:       centered states and writing the earliest attested form of greek with the linear script the mycenaeans gradually absorbed the

target:      centered states and writing the earliest attested form of greek with the linear script the mycenaeans gradually absorbed the minoans

prediction:  centered states and writing the earliest attested form of greek with the linear script the post gradually internet the the

 epoch: 540 | train_loss: 0.72, train_acc: 91.9% | test_loss: 0.78, test_acc: 90.9%


  4%|▎         | 542/15000 [01:03<28:53,  8.34it/s]


 epoch: 541 | train_loss: 0.66, train_acc: 92.4% | test_loss: 0.74, test_acc: 91.1%

 epoch: 542 | train_loss: 0.80, train_acc: 90.2% | test_loss: 0.78, test_acc: 90.8%


  4%|▎         | 545/15000 [01:03<25:59,  9.27it/s]


 epoch: 543 | train_loss: 0.75, train_acc: 90.8% | test_loss: 0.79, test_acc: 90.2%

 epoch: 544 | train_loss: 0.72, train_acc: 91.5% | test_loss: 0.72, test_acc: 90.9%

 epoch: 545 | train_loss: 0.84, train_acc: 89.9% | test_loss: 0.79, test_acc: 90.6%


  4%|▎         | 547/15000 [01:03<25:42,  9.37it/s]


 epoch: 546 | train_loss: 0.83, train_acc: 90.3% | test_loss: 0.73, test_acc: 91.4%

 epoch: 547 | train_loss: 0.76, train_acc: 91.5% | test_loss: 0.72, test_acc: 91.5%


  4%|▎         | 549/15000 [01:04<25:32,  9.43it/s]


 epoch: 548 | train_loss: 0.78, train_acc: 90.8% | test_loss: 0.77, test_acc: 91.2%

 epoch: 549 | train_loss: 0.74, train_acc: 91.2% | test_loss: 0.79, test_acc: 91.1%

input:       culture and general usage islands at the geographic extremes of oceania are generally considered to be the bonin islands

target:      culture and general usage islands at the geographic extremes of oceania are generally considered to be the bonin islands politically

prediction:  culture and general usage islands at the geographic extremes of oceania are generally considered to be the bonin islands the


  4%|▎         | 551/15000 [01:04<50:55,  4.73it/s]  


 epoch: 550 | train_loss: 0.72, train_acc: 91.1% | test_loss: 0.76, test_acc: 91.0%

 epoch: 551 | train_loss: 0.82, train_acc: 90.4% | test_loss: 0.84, test_acc: 90.2%


  4%|▎         | 553/15000 [01:05<38:03,  6.33it/s]


 epoch: 552 | train_loss: 0.75, train_acc: 91.1% | test_loss: 0.79, test_acc: 90.5%

 epoch: 553 | train_loss: 0.73, train_acc: 91.3% | test_loss: 0.69, test_acc: 91.9%


  4%|▎         | 555/15000 [01:05<31:31,  7.64it/s]


 epoch: 554 | train_loss: 0.82, train_acc: 90.2% | test_loss: 0.68, test_acc: 92.0%

 epoch: 555 | train_loss: 0.83, train_acc: 90.4% | test_loss: 0.72, test_acc: 91.4%


  4%|▎         | 558/15000 [01:05<27:10,  8.86it/s]


 epoch: 556 | train_loss: 0.82, train_acc: 90.4% | test_loss: 0.77, test_acc: 90.8%

 epoch: 557 | train_loss: 0.78, train_acc: 90.8% | test_loss: 0.77, test_acc: 91.3%

 epoch: 558 | train_loss: 0.82, train_acc: 90.0% | test_loss: 0.73, test_acc: 91.4%


  4%|▎         | 560/15000 [01:05<29:08,  8.26it/s]


 epoch: 559 | train_loss: 0.70, train_acc: 91.9% | test_loss: 0.86, test_acc: 90.5%

input:       largest and second most populous continent after asia in both aspects at about million km million square miles including

target:      largest and second most populous continent after asia in both aspects at about million km million square miles including adjacent

prediction:  largest and second most populous continent after asia in both aspects at about million km million square miles including the

 epoch: 560 | train_loss: 0.76, train_acc: 90.8% | test_loss: 0.81, test_acc: 90.6%


  4%|▎         | 562/15000 [01:06<27:07,  8.87it/s]


 epoch: 561 | train_loss: 0.68, train_acc: 92.1% | test_loss: 0.73, test_acc: 92.0%

 epoch: 562 | train_loss: 0.74, train_acc: 91.4% | test_loss: 0.69, test_acc: 92.0%


  4%|▍         | 563/15000 [01:06<26:40,  9.02it/s]


 epoch: 563 | train_loss: 0.78, train_acc: 90.5% | test_loss: 0.76, test_acc: 91.4%


  4%|▍         | 566/15000 [01:06<34:40,  6.94it/s]


 epoch: 564 | train_loss: 0.77, train_acc: 91.1% | test_loss: 0.77, test_acc: 91.2%

 epoch: 565 | train_loss: 0.66, train_acc: 92.1% | test_loss: 0.76, test_acc: 91.1%

 epoch: 566 | train_loss: 0.77, train_acc: 91.1% | test_loss: 0.74, test_acc: 91.0%


  4%|▍         | 568/15000 [01:06<28:53,  8.33it/s]


 epoch: 567 | train_loss: 0.79, train_acc: 90.5% | test_loss: 0.72, test_acc: 91.4%

 epoch: 568 | train_loss: 0.77, train_acc: 91.1% | test_loss: 0.78, test_acc: 90.8%

 epoch: 569 | train_loss: 0.73, train_acc: 91.4% | test_loss: 0.65, test_acc: 92.5%


  4%|▍         | 570/15000 [01:07<27:18,  8.81it/s]


input:       and whose associates ralph alpher and robert herman predicted the cmb ironically it was hoyle who coined the phrase

target:      and whose associates ralph alpher and robert herman predicted the cmb ironically it was hoyle who coined the phrase that

prediction:  and whose associates behaviour process and robert migrated predicted the cmb moderators it was hoyle who coined the phrase the

 epoch: 570 | train_loss: 0.70, train_acc: 92.2% | test_loss: 0.79, test_acc: 90.7%

 epoch: 571 | train_loss: 0.72, train_acc: 91.4% | test_loss: 0.73, test_acc: 91.4%


  4%|▍         | 574/15000 [01:07<23:44, 10.13it/s]


 epoch: 572 | train_loss: 0.74, train_acc: 91.1% | test_loss: 0.83, test_acc: 90.2%

 epoch: 573 | train_loss: 0.65, train_acc: 92.7% | test_loss: 0.72, test_acc: 91.2%

 epoch: 574 | train_loss: 0.78, train_acc: 90.5% | test_loss: 0.67, test_acc: 92.5%


  4%|▍         | 576/15000 [01:07<22:50, 10.52it/s]


 epoch: 575 | train_loss: 0.77, train_acc: 90.9% | test_loss: 0.70, test_acc: 91.2%

 epoch: 576 | train_loss: 0.69, train_acc: 91.7% | test_loss: 0.79, test_acc: 90.8%

 epoch: 577 | train_loss: 0.75, train_acc: 91.1% | test_loss: 0.72, test_acc: 92.0%


  4%|▍         | 580/15000 [01:08<33:50,  7.10it/s]


 epoch: 578 | train_loss: 0.80, train_acc: 90.5% | test_loss: 0.74, test_acc: 91.0%

 epoch: 579 | train_loss: 0.77, train_acc: 90.4% | test_loss: 0.77, test_acc: 90.5%

input:       as are the languages intended for execution he also argues that textual and even graphical input formats that affect

target:      as are the languages intended for execution he also argues that textual and even graphical input formats that affect the

prediction:  as are the languages intended for execution he also argues that textual and even graphical input zealand that affect the

 epoch: 580 | train_loss: 0.81, train_acc: 90.2% | test_loss: 0.72, test_acc: 91.3%


  4%|▍         | 582/15000 [01:08<29:23,  8.17it/s]


 epoch: 581 | train_loss: 0.79, train_acc: 90.2% | test_loss: 0.77, test_acc: 90.8%

 epoch: 582 | train_loss: 0.78, train_acc: 90.5% | test_loss: 0.63, test_acc: 92.7%

 epoch: 583 | train_loss: 0.72, train_acc: 91.6% | test_loss: 0.65, test_acc: 92.5%


  4%|▍         | 586/15000 [01:08<24:11,  9.93it/s]


 epoch: 584 | train_loss: 0.80, train_acc: 90.6% | test_loss: 0.69, test_acc: 91.4%

 epoch: 585 | train_loss: 0.68, train_acc: 92.1% | test_loss: 0.77, test_acc: 90.5%

 epoch: 586 | train_loss: 0.73, train_acc: 91.3% | test_loss: 0.80, test_acc: 90.7%


  4%|▍         | 588/15000 [01:09<23:10, 10.36it/s]


 epoch: 587 | train_loss: 0.72, train_acc: 91.4% | test_loss: 0.78, test_acc: 90.9%

 epoch: 588 | train_loss: 0.72, train_acc: 91.5% | test_loss: 0.78, test_acc: 91.0%

 epoch: 589 | train_loss: 0.74, train_acc: 91.3% | test_loss: 0.78, test_acc: 90.5%


  4%|▍         | 590/15000 [01:09<23:35, 10.18it/s]


input:       port argentina has ports such as port of buenos aires and port of rosario chile has important ports in

target:      port argentina has ports such as port of buenos aires and port of rosario chile has important ports in valpara

prediction:  port argentina has ports such as port of buenos aires and port of varying chile has important ports in the

 epoch: 590 | train_loss: 0.78, train_acc: 90.3% | test_loss: 0.73, test_acc: 91.2%

 epoch: 591 | train_loss: 0.73, train_acc: 91.4% | test_loss: 0.75, test_acc: 90.9%


  4%|▍         | 594/15000 [01:10<33:17,  7.21it/s]


 epoch: 592 | train_loss: 0.80, train_acc: 90.6% | test_loss: 0.68, test_acc: 92.4%

 epoch: 593 | train_loss: 0.80, train_acc: 91.5% | test_loss: 0.74, test_acc: 91.8%

 epoch: 594 | train_loss: 0.64, train_acc: 93.0% | test_loss: 0.74, test_acc: 91.2%


  4%|▍         | 596/15000 [01:10<29:09,  8.24it/s]


 epoch: 595 | train_loss: 0.78, train_acc: 90.7% | test_loss: 0.72, test_acc: 91.0%

 epoch: 596 | train_loss: 0.74, train_acc: 91.2% | test_loss: 0.76, test_acc: 90.9%

 epoch: 597 | train_loss: 0.77, train_acc: 90.8% | test_loss: 0.66, test_acc: 92.4%


  4%|▍         | 600/15000 [01:10<25:32,  9.39it/s]


 epoch: 598 | train_loss: 0.86, train_acc: 90.0% | test_loss: 0.80, test_acc: 90.5%

 epoch: 599 | train_loss: 0.72, train_acc: 91.6% | test_loss: 0.80, test_acc: 90.8%

input:       mind and educere meaning bring out lead forth and refers to the bodily level some theorists provide precise definitions

target:      mind and educere meaning bring out lead forth and refers to the bodily level some theorists provide precise definitions by

prediction:  mind and sedimentary meaning bring out lead forth and refers to the bodily level some theorists provide precise definitions the

 epoch: 600 | train_loss: 0.75, train_acc: 91.5% | test_loss: 0.73, test_acc: 92.0%


  4%|▍         | 602/15000 [01:10<23:43, 10.11it/s]


 epoch: 601 | train_loss: 0.65, train_acc: 92.4% | test_loss: 0.72, test_acc: 91.5%

 epoch: 602 | train_loss: 0.73, train_acc: 90.5% | test_loss: 0.81, test_acc: 90.2%

 epoch: 603 | train_loss: 0.77, train_acc: 90.4% | test_loss: 0.74, test_acc: 91.0%


  4%|▍         | 606/15000 [01:11<22:32, 10.64it/s]


 epoch: 604 | train_loss: 0.69, train_acc: 92.0% | test_loss: 0.70, test_acc: 91.6%

 epoch: 605 | train_loss: 0.74, train_acc: 90.9% | test_loss: 0.70, test_acc: 92.5%

 epoch: 606 | train_loss: 0.72, train_acc: 91.7% | test_loss: 0.71, test_acc: 91.9%


  4%|▍         | 608/15000 [01:11<35:23,  6.78it/s]


 epoch: 607 | train_loss: 0.69, train_acc: 92.2% | test_loss: 0.81, test_acc: 90.7%

 epoch: 608 | train_loss: 0.68, train_acc: 92.2% | test_loss: 0.78, test_acc: 90.5%

 epoch: 609 | train_loss: 0.79, train_acc: 90.5% | test_loss: 0.76, test_acc: 90.9%


  4%|▍         | 610/15000 [01:11<31:57,  7.50it/s]


input:       concordant allele results because the relationships of these two mummies with the kv mummy had previously been confirmed in

target:      concordant allele results because the relationships of these two mummies with the kv mummy had previously been confirmed in an

prediction:  wide allele results because the relationships of these two mummies with the attacks mummy had previously been confirmed in of

 epoch: 610 | train_loss: 0.69, train_acc: 92.1% | test_loss: 0.75, test_acc: 90.8%

 epoch: 611 | train_loss: 0.74, train_acc: 91.3% | test_loss: 0.78, test_acc: 91.1%


  4%|▍         | 614/15000 [01:12<25:18,  9.48it/s]


 epoch: 612 | train_loss: 0.73, train_acc: 91.5% | test_loss: 0.64, test_acc: 92.5%

 epoch: 613 | train_loss: 0.66, train_acc: 92.5% | test_loss: 0.74, test_acc: 91.3%

 epoch: 614 | train_loss: 0.72, train_acc: 91.3% | test_loss: 0.75, test_acc: 91.1%


  4%|▍         | 616/15000 [01:12<23:47, 10.08it/s]


 epoch: 615 | train_loss: 0.67, train_acc: 92.2% | test_loss: 0.73, test_acc: 91.6%

 epoch: 616 | train_loss: 0.74, train_acc: 91.6% | test_loss: 0.67, test_acc: 92.5%

 epoch: 617 | train_loss: 0.78, train_acc: 91.2% | test_loss: 0.79, test_acc: 90.9%


  4%|▍         | 618/15000 [01:12<23:05, 10.38it/s]


 epoch: 618 | train_loss: 0.69, train_acc: 91.6% | test_loss: 0.76, test_acc: 90.8%

 epoch: 619 | train_loss: 0.69, train_acc: 92.1% | test_loss: 0.66, test_acc: 92.0%

input:       or even desirable outcome in matches between these highly skilled fighters whose training was costly and time consuming by

target:      or even desirable outcome in matches between these highly skilled fighters whose training was costly and time consuming by contrast

prediction:  or even desirable outcome in matches between these highly skilled two whose training was sound and time purchase by the


  4%|▍         | 620/15000 [01:12<23:45, 10.09it/s]


 epoch: 620 | train_loss: 0.70, train_acc: 92.4% | test_loss: 0.63, test_acc: 92.6%


  4%|▍         | 622/15000 [01:13<38:46,  6.18it/s]


 epoch: 621 | train_loss: 0.66, train_acc: 92.6% | test_loss: 0.68, test_acc: 92.2%

 epoch: 622 | train_loss: 0.65, train_acc: 92.6% | test_loss: 0.75, test_acc: 91.0%

 epoch: 623 | train_loss: 0.67, train_acc: 92.1% | test_loss: 0.68, test_acc: 92.2%


  4%|▍         | 626/15000 [01:13<28:47,  8.32it/s]


 epoch: 624 | train_loss: 0.71, train_acc: 91.7% | test_loss: 0.67, test_acc: 92.4%

 epoch: 625 | train_loss: 0.74, train_acc: 91.7% | test_loss: 0.73, test_acc: 91.5%

 epoch: 626 | train_loss: 0.68, train_acc: 92.1% | test_loss: 0.73, test_acc: 91.4%


  4%|▍         | 628/15000 [01:13<25:51,  9.26it/s]


 epoch: 627 | train_loss: 0.72, train_acc: 91.4% | test_loss: 0.74, test_acc: 91.4%

 epoch: 628 | train_loss: 0.69, train_acc: 91.5% | test_loss: 0.69, test_acc: 91.9%

 epoch: 629 | train_loss: 0.79, train_acc: 90.8% | test_loss: 0.64, test_acc: 93.1%


  4%|▍         | 630/15000 [01:13<25:17,  9.47it/s]


input:       each have populations higher than million greenland has small population of for its massive size km or mi and

target:      each have populations higher than million greenland has small population of for its massive size km or mi and therefore

prediction:  each have populations higher than million greenland has small population of for its massive size km or mi and the

 epoch: 630 | train_loss: 0.77, train_acc: 90.8% | test_loss: 0.79, test_acc: 90.7%

 epoch: 631 | train_loss: 0.70, train_acc: 92.0% | test_loss: 0.75, test_acc: 92.0%


  4%|▍         | 634/15000 [01:14<22:55, 10.44it/s]


 epoch: 632 | train_loss: 0.76, train_acc: 90.9% | test_loss: 0.71, test_acc: 91.4%

 epoch: 633 | train_loss: 0.73, train_acc: 91.6% | test_loss: 0.74, test_acc: 91.5%

 epoch: 634 | train_loss: 0.68, train_acc: 91.8% | test_loss: 0.73, test_acc: 91.8%

 epoch: 635 | train_loss: 0.69, train_acc: 92.2% | test_loss: 0.69, test_acc: 91.8%


  4%|▍         | 638/15000 [01:15<32:05,  7.46it/s]


 epoch: 636 | train_loss: 0.73, train_acc: 91.3% | test_loss: 0.66, test_acc: 92.3%

 epoch: 637 | train_loss: 0.76, train_acc: 90.9% | test_loss: 0.65, test_acc: 92.2%

 epoch: 638 | train_loss: 0.77, train_acc: 91.1% | test_loss: 0.68, test_acc: 92.1%


  4%|▍         | 640/15000 [01:15<29:13,  8.19it/s]


 epoch: 639 | train_loss: 0.63, train_acc: 93.1% | test_loss: 0.66, test_acc: 92.4%

input:       government in athens the population was divided into four social classes based on wealth people could change classes if

target:      government in athens the population was divided into four social classes based on wealth people could change classes if they

prediction:  government in athens the population was divided into four social classes based on wealth people could change classes if the

 epoch: 640 | train_loss: 0.71, train_acc: 91.8% | test_loss: 0.70, test_acc: 91.6%

 epoch: 641 | train_loss: 0.76, train_acc: 90.4% | test_loss: 0.66, test_acc: 92.1%


  4%|▍         | 644/15000 [01:15<24:34,  9.74it/s]


 epoch: 642 | train_loss: 0.77, train_acc: 90.9% | test_loss: 0.65, test_acc: 92.3%

 epoch: 643 | train_loss: 0.65, train_acc: 91.9% | test_loss: 0.70, test_acc: 91.7%

 epoch: 644 | train_loss: 0.70, train_acc: 91.2% | test_loss: 0.72, test_acc: 92.3%


  4%|▍         | 646/15000 [01:15<22:59, 10.40it/s]


 epoch: 645 | train_loss: 0.75, train_acc: 91.4% | test_loss: 0.66, test_acc: 92.5%

 epoch: 646 | train_loss: 0.73, train_acc: 91.7% | test_loss: 0.62, test_acc: 93.4%

 epoch: 647 | train_loss: 0.70, train_acc: 91.8% | test_loss: 0.69, test_acc: 91.6%


  4%|▍         | 648/15000 [01:15<21:53, 10.93it/s]


 epoch: 648 | train_loss: 0.69, train_acc: 91.8% | test_loss: 0.61, test_acc: 93.0%

 epoch: 649 | train_loss: 0.75, train_acc: 90.7% | test_loss: 0.63, test_acc: 93.0%

input:       usage was used to project power and higher social class different emperors up until justinian would attempt to require

target:      usage was used to project power and higher social class different emperors up until justinian would attempt to require the

prediction:  usage was used to project power and higher social class different emperors up until justinian would attempt to require the


  4%|▍         | 652/15000 [01:16<23:15, 10.28it/s]


 epoch: 650 | train_loss: 0.71, train_acc: 91.6% | test_loss: 0.64, test_acc: 92.0%

 epoch: 651 | train_loss: 0.71, train_acc: 91.2% | test_loss: 0.74, test_acc: 91.6%

 epoch: 652 | train_loss: 0.70, train_acc: 92.2% | test_loss: 0.75, test_acc: 91.5%


  4%|▍         | 654/15000 [01:16<23:26, 10.20it/s]


 epoch: 653 | train_loss: 0.75, train_acc: 90.8% | test_loss: 0.80, test_acc: 90.8%

 epoch: 654 | train_loss: 0.64, train_acc: 92.8% | test_loss: 0.79, test_acc: 90.7%

 epoch: 655 | train_loss: 0.72, train_acc: 91.2% | test_loss: 0.74, test_acc: 91.1%


  4%|▍         | 658/15000 [01:16<22:52, 10.45it/s]


 epoch: 656 | train_loss: 0.61, train_acc: 92.8% | test_loss: 0.75, test_acc: 91.4%

 epoch: 657 | train_loss: 0.69, train_acc: 91.5% | test_loss: 0.75, test_acc: 91.2%

 epoch: 658 | train_loss: 0.68, train_acc: 91.8% | test_loss: 0.74, test_acc: 91.4%


  4%|▍         | 660/15000 [01:17<24:57,  9.58it/s]


 epoch: 659 | train_loss: 0.72, train_acc: 91.8% | test_loss: 0.69, test_acc: 92.0%

input:       s to have large scale manufacturing of sewing machines bicycles and other items in the late th century in

target:      s to have large scale manufacturing of sewing machines bicycles and other items in the late th century in the

prediction:  s to have large scale manufacturing of some machines particles and other items in the late th century in the

 epoch: 660 | train_loss: 0.70, train_acc: 91.7% | test_loss: 0.72, test_acc: 91.7%


  4%|▍         | 662/15000 [01:17<24:53,  9.60it/s]


 epoch: 661 | train_loss: 0.72, train_acc: 91.5% | test_loss: 0.78, test_acc: 90.9%

 epoch: 662 | train_loss: 0.68, train_acc: 92.0% | test_loss: 0.67, test_acc: 92.2%

 epoch: 663 | train_loss: 0.61, train_acc: 92.7% | test_loss: 0.71, test_acc: 91.5%


  4%|▍         | 665/15000 [01:18<40:37,  5.88it/s]


 epoch: 664 | train_loss: 0.73, train_acc: 91.2% | test_loss: 0.65, test_acc: 92.0%

 epoch: 665 | train_loss: 0.77, train_acc: 90.4% | test_loss: 0.80, test_acc: 90.3%


  4%|▍         | 669/15000 [01:18<29:59,  7.96it/s]


 epoch: 666 | train_loss: 0.71, train_acc: 91.9% | test_loss: 0.66, test_acc: 92.1%

 epoch: 667 | train_loss: 0.71, train_acc: 91.6% | test_loss: 0.81, test_acc: 90.6%

 epoch: 668 | train_loss: 0.62, train_acc: 92.9% | test_loss: 0.66, test_acc: 92.4%

 epoch: 669 | train_loss: 0.70, train_acc: 91.7% | test_loss: 0.72, test_acc: 91.4%


  4%|▍         | 671/15000 [01:18<30:57,  7.71it/s]


input:       building community fyreuk to help render the environments into minecraft the first pilot project began in kibera one of

target:      building community fyreuk to help render the environments into minecraft the first pilot project began in kibera one of nairobi

prediction:  building community old to help wealth the environments into minecraft the first fertile project began in conquest one of the

 epoch: 670 | train_loss: 0.71, train_acc: 91.5% | test_loss: 0.72, test_acc: 91.3%

 epoch: 671 | train_loss: 0.69, train_acc: 91.7% | test_loss: 0.68, test_acc: 92.3%


  4%|▍         | 673/15000 [01:19<28:34,  8.35it/s]


 epoch: 672 | train_loss: 0.76, train_acc: 90.9% | test_loss: 0.65, test_acc: 92.4%

 epoch: 673 | train_loss: 0.71, train_acc: 91.8% | test_loss: 0.69, test_acc: 92.0%


  5%|▍         | 676/15000 [01:19<27:35,  8.65it/s]


 epoch: 674 | train_loss: 0.74, train_acc: 91.2% | test_loss: 0.68, test_acc: 92.0%

 epoch: 675 | train_loss: 0.62, train_acc: 92.9% | test_loss: 0.75, test_acc: 91.2%

 epoch: 676 | train_loss: 0.66, train_acc: 91.9% | test_loss: 0.80, test_acc: 90.5%


  5%|▍         | 677/15000 [01:19<27:15,  8.76it/s]


 epoch: 677 | train_loss: 0.77, train_acc: 90.6% | test_loss: 0.72, test_acc: 91.9%


  5%|▍         | 680/15000 [01:20<42:27,  5.62it/s]


 epoch: 678 | train_loss: 0.69, train_acc: 91.7% | test_loss: 0.58, test_acc: 93.0%

 epoch: 679 | train_loss: 0.66, train_acc: 92.3% | test_loss: 0.66, test_acc: 91.9%

input:       called syntax grammar can be described as system of categories and set of rules that determine how categories combine

target:      called syntax grammar can be described as system of categories and set of rules that determine how categories combine to

prediction:  called syntax grammar can be described as system of categories and set of rules that determine how categories combine the

 epoch: 680 | train_loss: 0.64, train_acc: 92.3% | test_loss: 0.69, test_acc: 92.1%


  5%|▍         | 682/15000 [01:20<33:37,  7.10it/s]


 epoch: 681 | train_loss: 0.64, train_acc: 92.4% | test_loss: 0.64, test_acc: 92.5%

 epoch: 682 | train_loss: 0.64, train_acc: 92.2% | test_loss: 0.70, test_acc: 91.9%

 epoch: 683 | train_loss: 0.78, train_acc: 90.8% | test_loss: 0.72, test_acc: 91.7%


  5%|▍         | 686/15000 [01:20<25:56,  9.20it/s]


 epoch: 684 | train_loss: 0.75, train_acc: 91.1% | test_loss: 0.66, test_acc: 92.3%

 epoch: 685 | train_loss: 0.66, train_acc: 92.2% | test_loss: 0.68, test_acc: 92.0%

 epoch: 686 | train_loss: 0.71, train_acc: 91.5% | test_loss: 0.65, test_acc: 92.1%


  5%|▍         | 688/15000 [01:20<24:14,  9.84it/s]


 epoch: 687 | train_loss: 0.63, train_acc: 92.5% | test_loss: 0.74, test_acc: 91.2%

 epoch: 688 | train_loss: 0.63, train_acc: 92.5% | test_loss: 0.63, test_acc: 92.7%

 epoch: 689 | train_loss: 0.66, train_acc: 92.4% | test_loss: 0.66, test_acc: 92.0%


  5%|▍         | 690/15000 [01:21<24:53,  9.58it/s]


input:       proper with anatomically modern homo sapiens with the upper paleolithic revolution less than years ago chomsky is one prominent

target:      proper with anatomically modern homo sapiens with the upper paleolithic revolution less than years ago chomsky is one prominent proponent

prediction:  proper with standard modern homo sapiens with the upper paleolithic revolution less than years ago chomsky is one prominent the

 epoch: 690 | train_loss: 0.63, train_acc: 92.5% | test_loss: 0.67, test_acc: 92.0%

 epoch: 691 | train_loss: 0.75, train_acc: 91.1% | test_loss: 0.83, test_acc: 90.4%


  5%|▍         | 694/15000 [01:21<25:40,  9.29it/s]


 epoch: 692 | train_loss: 0.64, train_acc: 92.6% | test_loss: 0.63, test_acc: 92.8%

 epoch: 693 | train_loss: 0.63, train_acc: 93.1% | test_loss: 0.71, test_acc: 92.0%

 epoch: 694 | train_loss: 0.73, train_acc: 91.3% | test_loss: 0.72, test_acc: 91.7%


  5%|▍         | 696/15000 [01:21<23:39, 10.07it/s]


 epoch: 695 | train_loss: 0.62, train_acc: 92.4% | test_loss: 0.75, test_acc: 91.1%

 epoch: 696 | train_loss: 0.67, train_acc: 92.3% | test_loss: 0.77, test_acc: 90.6%

 epoch: 697 | train_loss: 0.75, train_acc: 91.3% | test_loss: 0.68, test_acc: 92.4%


  5%|▍         | 700/15000 [01:22<22:44, 10.48it/s]


 epoch: 698 | train_loss: 0.68, train_acc: 91.7% | test_loss: 0.73, test_acc: 91.3%

 epoch: 699 | train_loss: 0.62, train_acc: 92.7% | test_loss: 0.66, test_acc: 92.0%

input:       word typology in english transitive clauses the subjects of both intransitive sentences run and transitive sentences love you are

target:      word typology in english transitive clauses the subjects of both intransitive sentences run and transitive sentences love you are treated

prediction:  word artists in english possible numerous the subjects of both wilson sentences run and possible sentences love you are the

 epoch: 700 | train_loss: 0.65, train_acc: 92.7% | test_loss: 0.71, test_acc: 91.7%


  5%|▍         | 702/15000 [01:22<22:03, 10.80it/s]


 epoch: 701 | train_loss: 0.72, train_acc: 91.6% | test_loss: 0.72, test_acc: 91.7%

 epoch: 702 | train_loss: 0.70, train_acc: 91.6% | test_loss: 0.65, test_acc: 92.4%

 epoch: 703 | train_loss: 0.68, train_acc: 92.1% | test_loss: 0.77, test_acc: 91.1%


  5%|▍         | 706/15000 [01:22<21:32, 11.06it/s]


 epoch: 704 | train_loss: 0.68, train_acc: 92.0% | test_loss: 0.65, test_acc: 92.7%

 epoch: 705 | train_loss: 0.73, train_acc: 91.2% | test_loss: 0.79, test_acc: 90.4%

 epoch: 706 | train_loss: 0.63, train_acc: 92.8% | test_loss: 0.63, test_acc: 92.8%


  5%|▍         | 708/15000 [01:23<35:57,  6.62it/s]


 epoch: 707 | train_loss: 0.71, train_acc: 91.0% | test_loss: 0.71, test_acc: 91.6%

 epoch: 708 | train_loss: 0.71, train_acc: 92.0% | test_loss: 0.72, test_acc: 91.6%

 epoch: 709 | train_loss: 0.62, train_acc: 92.3% | test_loss: 0.65, test_acc: 92.7%


  5%|▍         | 710/15000 [01:23<32:08,  7.41it/s]


input:       representation for each state many policy issues are decentralized at state or local level with widely differing laws by

target:      representation for each state many policy issues are decentralized at state or local level with widely differing laws by jurisdiction

prediction:  representation for each state many policy issues are decentralized at state or local level with widely children laws by the

 epoch: 710 | train_loss: 0.67, train_acc: 92.0% | test_loss: 0.70, test_acc: 92.0%

 epoch: 711 | train_loss: 0.67, train_acc: 91.9% | test_loss: 0.68, test_acc: 92.1%


  5%|▍         | 714/15000 [01:23<26:21,  9.03it/s]


 epoch: 712 | train_loss: 0.64, train_acc: 92.1% | test_loss: 0.63, test_acc: 92.7%

 epoch: 713 | train_loss: 0.70, train_acc: 91.8% | test_loss: 0.65, test_acc: 92.6%

 epoch: 714 | train_loss: 0.64, train_acc: 92.8% | test_loss: 0.67, test_acc: 92.2%


  5%|▍         | 716/15000 [01:24<24:25,  9.74it/s]


 epoch: 715 | train_loss: 0.67, train_acc: 91.7% | test_loss: 0.70, test_acc: 91.8%

 epoch: 716 | train_loss: 0.70, train_acc: 91.8% | test_loss: 0.70, test_acc: 91.9%

 epoch: 717 | train_loss: 0.61, train_acc: 93.0% | test_loss: 0.69, test_acc: 91.6%


  5%|▍         | 718/15000 [01:24<23:19, 10.20it/s]


 epoch: 718 | train_loss: 0.59, train_acc: 93.3% | test_loss: 0.67, test_acc: 92.0%

 epoch: 719 | train_loss: 0.64, train_acc: 92.3% | test_loss: 0.70, test_acc: 91.6%

input:       high precision spectrum measurements showed that the cmb frequency spectrum is an almost perfect blackbody with no deviations at

target:      high precision spectrum measurements showed that the cmb frequency spectrum is an almost perfect blackbody with no deviations at level

prediction:  high precision spectrum measurements showed that the cmb frequency spectrum is an almost perfect blackbody with no confederation at the


  5%|▍         | 720/15000 [01:24<24:02,  9.90it/s]


 epoch: 720 | train_loss: 0.68, train_acc: 92.1% | test_loss: 0.73, test_acc: 91.7%


  5%|▍         | 722/15000 [01:25<38:47,  6.14it/s]


 epoch: 721 | train_loss: 0.65, train_acc: 92.7% | test_loss: 0.71, test_acc: 92.0%

 epoch: 722 | train_loss: 0.67, train_acc: 92.0% | test_loss: 0.66, test_acc: 92.4%

 epoch: 723 | train_loss: 0.69, train_acc: 91.5% | test_loss: 0.67, test_acc: 92.2%


  5%|▍         | 726/15000 [01:25<28:39,  8.30it/s]


 epoch: 724 | train_loss: 0.60, train_acc: 92.8% | test_loss: 0.69, test_acc: 91.2%

 epoch: 725 | train_loss: 0.62, train_acc: 93.0% | test_loss: 0.65, test_acc: 91.9%

 epoch: 726 | train_loss: 0.69, train_acc: 92.0% | test_loss: 0.65, test_acc: 92.4%


  5%|▍         | 728/15000 [01:25<25:59,  9.15it/s]


 epoch: 727 | train_loss: 0.65, train_acc: 92.3% | test_loss: 0.72, test_acc: 91.6%

 epoch: 728 | train_loss: 0.67, train_acc: 92.1% | test_loss: 0.66, test_acc: 92.7%

 epoch: 729 | train_loss: 0.75, train_acc: 91.3% | test_loss: 0.68, test_acc: 92.2%


  5%|▍         | 730/15000 [01:25<25:02,  9.49it/s]


input:       horacio pi ero at the university of buenos aires in russia too researchers placed greater emphasis on the biological

target:      horacio pi ero at the university of buenos aires in russia too researchers placed greater emphasis on the biological basis

prediction:  smallest pi iii at the university of buenos aires in russia too researchers placed greater emphasis on the biological the

 epoch: 730 | train_loss: 0.65, train_acc: 92.4% | test_loss: 0.71, test_acc: 91.7%

 epoch: 731 | train_loss: 0.72, train_acc: 91.5% | test_loss: 0.59, test_acc: 93.4%


  5%|▍         | 734/15000 [01:26<22:47, 10.43it/s]


 epoch: 732 | train_loss: 0.74, train_acc: 91.5% | test_loss: 0.66, test_acc: 92.3%

 epoch: 733 | train_loss: 0.67, train_acc: 92.0% | test_loss: 0.63, test_acc: 92.5%

 epoch: 734 | train_loss: 0.66, train_acc: 92.7% | test_loss: 0.70, test_acc: 91.3%

 epoch: 735 | train_loss: 0.55, train_acc: 93.7% | test_loss: 0.68, test_acc: 92.0%


  5%|▍         | 738/15000 [01:26<29:54,  7.95it/s]


 epoch: 736 | train_loss: 0.70, train_acc: 91.6% | test_loss: 0.63, test_acc: 92.8%

 epoch: 737 | train_loss: 0.65, train_acc: 92.7% | test_loss: 0.63, test_acc: 92.9%

 epoch: 738 | train_loss: 0.60, train_acc: 93.0% | test_loss: 0.70, test_acc: 91.7%


  5%|▍         | 740/15000 [01:26<28:00,  8.48it/s]


 epoch: 739 | train_loss: 0.78, train_acc: 91.0% | test_loss: 0.74, test_acc: 91.0%

input:       union driven by ideological differences the two countries led military affairs of europe with the and its nato

target:      union driven by ideological differences the two countries led military affairs of europe with the and its nato allies

prediction:  union driven by ideological differences the two countries led military affairs of europe with the and its nato the

 epoch: 740 | train_loss: 0.68, train_acc: 92.4% | test_loss: 0.64, test_acc: 92.6%

 epoch: 741 | train_loss: 0.61, train_acc: 93.3% | test_loss: 0.68, test_acc: 91.9%


  5%|▍         | 744/15000 [01:27<23:07, 10.28it/s]


 epoch: 742 | train_loss: 0.62, train_acc: 92.5% | test_loss: 0.56, test_acc: 93.6%

 epoch: 743 | train_loss: 0.68, train_acc: 92.4% | test_loss: 0.75, test_acc: 90.8%

 epoch: 744 | train_loss: 0.70, train_acc: 91.7% | test_loss: 0.58, test_acc: 93.1%


  5%|▍         | 746/15000 [01:27<22:10, 10.71it/s]


 epoch: 745 | train_loss: 0.59, train_acc: 93.2% | test_loss: 0.56, test_acc: 93.4%

 epoch: 746 | train_loss: 0.75, train_acc: 91.1% | test_loss: 0.63, test_acc: 92.8%

 epoch: 747 | train_loss: 0.61, train_acc: 92.9% | test_loss: 0.73, test_acc: 91.2%


  5%|▍         | 748/15000 [01:27<21:50, 10.87it/s]


 epoch: 748 | train_loss: 0.60, train_acc: 93.2% | test_loss: 0.66, test_acc: 92.3%

 epoch: 749 | train_loss: 0.72, train_acc: 91.7% | test_loss: 0.62, test_acc: 92.8%

input:       statements and odd images intended to provoke amusement such as the following after time they saw some land at

target:      statements and odd images intended to provoke amusement such as the following after time they saw some land at distance

prediction:  statements and odd images intended to cooperation amusement such as the following after time they saw some land at the


  5%|▌         | 752/15000 [01:28<32:21,  7.34it/s]


 epoch: 750 | train_loss: 0.63, train_acc: 92.4% | test_loss: 0.60, test_acc: 93.1%

 epoch: 751 | train_loss: 0.63, train_acc: 92.4% | test_loss: 0.61, test_acc: 93.0%

 epoch: 752 | train_loss: 0.66, train_acc: 92.1% | test_loss: 0.65, test_acc: 92.8%


  5%|▌         | 754/15000 [01:28<28:28,  8.34it/s]


 epoch: 753 | train_loss: 0.66, train_acc: 92.4% | test_loss: 0.60, test_acc: 93.0%

 epoch: 754 | train_loss: 0.65, train_acc: 92.5% | test_loss: 0.68, test_acc: 92.1%

 epoch: 755 | train_loss: 0.64, train_acc: 92.7% | test_loss: 0.61, test_acc: 92.8%


  5%|▌         | 758/15000 [01:28<23:46,  9.99it/s]


 epoch: 756 | train_loss: 0.61, train_acc: 92.6% | test_loss: 0.70, test_acc: 91.1%

 epoch: 757 | train_loss: 0.59, train_acc: 93.2% | test_loss: 0.63, test_acc: 92.0%

 epoch: 758 | train_loss: 0.63, train_acc: 92.8% | test_loss: 0.69, test_acc: 92.0%


  5%|▌         | 760/15000 [01:29<23:14, 10.21it/s]


 epoch: 759 | train_loss: 0.59, train_acc: 92.9% | test_loss: 0.64, test_acc: 92.1%

input:       throughout mesopotamia afghanistan and iran chickens were domesticated in addition to the earlier crops and animals they developed their

target:      throughout mesopotamia afghanistan and iran chickens were domesticated in addition to the earlier crops and animals they developed their own

prediction:  throughout mesopotamia afghanistan and iran chickens were domesticated in addition to the earlier crops and animals they developed their the

 epoch: 760 | train_loss: 0.66, train_acc: 91.6% | test_loss: 0.58, test_acc: 93.6%

 epoch: 761 | train_loss: 0.69, train_acc: 91.8% | test_loss: 0.56, test_acc: 93.9%


  5%|▌         | 762/15000 [01:29<22:17, 10.65it/s]


 epoch: 762 | train_loss: 0.71, train_acc: 91.8% | test_loss: 0.62, test_acc: 92.6%

 epoch: 763 | train_loss: 0.68, train_acc: 91.8% | test_loss: 0.68, test_acc: 91.8%


  5%|▌         | 765/15000 [01:29<33:40,  7.04it/s]


 epoch: 764 | train_loss: 0.63, train_acc: 92.7% | test_loss: 0.66, test_acc: 92.2%

 epoch: 765 | train_loss: 0.67, train_acc: 92.1% | test_loss: 0.68, test_acc: 92.2%


  5%|▌         | 767/15000 [01:30<31:02,  7.64it/s]


 epoch: 766 | train_loss: 0.67, train_acc: 91.3% | test_loss: 0.64, test_acc: 92.2%

 epoch: 767 | train_loss: 0.70, train_acc: 91.8% | test_loss: 0.65, test_acc: 92.6%


  5%|▌         | 769/15000 [01:30<28:42,  8.26it/s]


 epoch: 768 | train_loss: 0.64, train_acc: 92.6% | test_loss: 0.60, test_acc: 93.1%

 epoch: 769 | train_loss: 0.59, train_acc: 92.9% | test_loss: 0.64, test_acc: 92.6%

input:       western halves ruled by multiple emperors constantine the great began the process of christianizing the empire and established new

target:      western halves ruled by multiple emperors constantine the great began the process of christianizing the empire and established new capital


  5%|▌         | 770/15000 [01:30<30:51,  7.69it/s]


prediction:  western halves ruled by multiple emperors constantine the great began the process of gas the empire and established new the

 epoch: 770 | train_loss: 0.67, train_acc: 91.9% | test_loss: 0.59, test_acc: 93.1%

 epoch: 771 | train_loss: 0.71, train_acc: 91.4% | test_loss: 0.63, test_acc: 92.6%


  5%|▌         | 773/15000 [01:30<27:13,  8.71it/s]


 epoch: 772 | train_loss: 0.63, train_acc: 92.6% | test_loss: 0.62, test_acc: 93.0%

 epoch: 773 | train_loss: 0.73, train_acc: 91.4% | test_loss: 0.59, test_acc: 93.4%


  5%|▌         | 775/15000 [01:31<27:11,  8.72it/s]


 epoch: 774 | train_loss: 0.64, train_acc: 92.6% | test_loss: 0.62, test_acc: 92.9%

 epoch: 775 | train_loss: 0.67, train_acc: 92.3% | test_loss: 0.67, test_acc: 92.4%


  5%|▌         | 777/15000 [01:31<26:09,  9.06it/s]


 epoch: 776 | train_loss: 0.61, train_acc: 93.0% | test_loss: 0.58, test_acc: 92.2%

 epoch: 777 | train_loss: 0.69, train_acc: 92.0% | test_loss: 0.62, test_acc: 92.5%


  5%|▌         | 779/15000 [01:31<33:54,  6.99it/s]


 epoch: 778 | train_loss: 0.72, train_acc: 91.9% | test_loss: 0.69, test_acc: 91.9%

 epoch: 779 | train_loss: 0.61, train_acc: 92.8% | test_loss: 0.64, test_acc: 92.3%

input:       pericles stepped in and was elected general or strategos in bc post he held continuously until his death in


  5%|▌         | 781/15000 [01:31<33:18,  7.11it/s]


target:      pericles stepped in and was elected general or strategos in bc post he held continuously until his death in bc

prediction:  pericles city in and was elected general or st in bc post he held continuously until his death in the

 epoch: 780 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.62, test_acc: 92.8%

 epoch: 781 | train_loss: 0.63, train_acc: 92.7% | test_loss: 0.62, test_acc: 92.7%


  5%|▌         | 783/15000 [01:32<29:32,  8.02it/s]


 epoch: 782 | train_loss: 0.62, train_acc: 92.9% | test_loss: 0.72, test_acc: 91.4%

 epoch: 783 | train_loss: 0.63, train_acc: 92.7% | test_loss: 0.57, test_acc: 93.5%


  5%|▌         | 785/15000 [01:32<27:52,  8.50it/s]


 epoch: 784 | train_loss: 0.59, train_acc: 93.0% | test_loss: 0.59, test_acc: 93.1%

 epoch: 785 | train_loss: 0.66, train_acc: 92.4% | test_loss: 0.70, test_acc: 92.0%


  5%|▌         | 787/15000 [01:32<27:33,  8.60it/s]


 epoch: 786 | train_loss: 0.63, train_acc: 92.5% | test_loss: 0.66, test_acc: 92.2%

 epoch: 787 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.68, test_acc: 92.2%


  5%|▌         | 789/15000 [01:32<27:23,  8.64it/s]


 epoch: 788 | train_loss: 0.55, train_acc: 93.8% | test_loss: 0.63, test_acc: 92.6%

 epoch: 789 | train_loss: 0.59, train_acc: 93.0% | test_loss: 0.57, test_acc: 93.2%

input:       the royal navy and settling on pitcairn islands which later became british colony britain also established colonies in australia

target:     

  5%|▌         | 791/15000 [01:33<28:53,  8.20it/s]

 the royal navy and settling on pitcairn islands which later became british colony britain also established colonies in australia in

prediction:  the royal navy and settling on pitcairn islands which later became british colony britain also established colonies in australia the

 epoch: 790 | train_loss: 0.60, train_acc: 93.1% | test_loss: 0.66, test_acc: 92.3%

 epoch: 791 | train_loss: 0.69, train_acc: 91.7% | test_loss: 0.68, test_acc: 92.0%


  5%|▌         | 792/15000 [01:33<28:44,  8.24it/s]


 epoch: 792 | train_loss: 0.56, train_acc: 93.5% | test_loss: 0.60, test_acc: 93.4%


  5%|▌         | 795/15000 [01:33<39:36,  5.98it/s]


 epoch: 793 | train_loss: 0.56, train_acc: 94.1% | test_loss: 0.67, test_acc: 92.0%

 epoch: 794 | train_loss: 0.55, train_acc: 93.7% | test_loss: 0.52, test_acc: 93.9%

 epoch: 795 | train_loss: 0.64, train_acc: 92.5% | test_loss: 0.67, test_acc: 91.8%


  5%|▌         | 797/15000 [01:34<31:41,  7.47it/s]


 epoch: 796 | train_loss: 0.64, train_acc: 92.6% | test_loss: 0.60, test_acc: 92.9%

 epoch: 797 | train_loss: 0.60, train_acc: 93.3% | test_loss: 0.70, test_acc: 92.0%

 epoch: 798 | train_loss: 0.55, train_acc: 93.6% | test_loss: 0.68, test_acc: 92.7%


  5%|▌         | 800/15000 [01:34<27:36,  8.57it/s]


 epoch: 799 | train_loss: 0.71, train_acc: 91.7% | test_loss: 0.62, test_acc: 92.9%

input:       within population for trait that can vary across range of values such as height can be categorised into three

target:      within population for trait that can vary across range of values such as height can be categorised into three different

prediction:  within population for trait that can vary across range of values such as height can be philosopher into three the

 epoch: 800 | train_loss: 0.63, train_acc: 92.7% | test_loss: 0.68, test_acc: 92.5%


  5%|▌         | 802/15000 [01:34<25:39,  9.22it/s]


 epoch: 801 | train_loss: 0.63, train_acc: 91.9% | test_loss: 0.60, test_acc: 93.2%

 epoch: 802 | train_loss: 0.67, train_acc: 91.7% | test_loss: 0.63, test_acc: 92.6%

 epoch: 803 | train_loss: 0.61, train_acc: 92.4% | test_loss: 0.59, test_acc: 93.2%


  5%|▌         | 806/15000 [01:34<23:10, 10.21it/s]


 epoch: 804 | train_loss: 0.55, train_acc: 93.4% | test_loss: 0.65, test_acc: 92.4%

 epoch: 805 | train_loss: 0.65, train_acc: 92.1% | test_loss: 0.60, test_acc: 92.9%

 epoch: 806 | train_loss: 0.65, train_acc: 92.2% | test_loss: 0.58, test_acc: 93.0%


  5%|▌         | 808/15000 [01:35<39:17,  6.02it/s]


 epoch: 807 | train_loss: 0.59, train_acc: 93.1% | test_loss: 0.61, test_acc: 92.8%

 epoch: 808 | train_loss: 0.69, train_acc: 92.2% | test_loss: 0.62, test_acc: 93.1%

 epoch: 809 | train_loss: 0.69, train_acc: 92.1% | test_loss: 0.62, test_acc: 93.0%


  5%|▌         | 810/15000 [01:35<34:36,  6.84it/s]


input:       pc versions since its release it has drawn in an estimated million players per month and remains one of

target:      pc versions since its release it has drawn in an estimated million players per month and remains one of the

prediction:  pc versions since its release it has drawn in an estimated million players per month and remains one of the

 epoch: 810 | train_loss: 0.64, train_acc: 92.9% | test_loss: 0.63, test_acc: 92.9%

 epoch: 811 | train_loss: 0.60, train_acc: 92.7% | test_loss: 0.62, test_acc: 92.8%


  5%|▌         | 814/15000 [01:36<26:44,  8.84it/s]


 epoch: 812 | train_loss: 0.64, train_acc: 92.7% | test_loss: 0.61, test_acc: 93.0%

 epoch: 813 | train_loss: 0.60, train_acc: 93.0% | test_loss: 0.70, test_acc: 91.8%

 epoch: 814 | train_loss: 0.62, train_acc: 92.9% | test_loss: 0.65, test_acc: 92.6%


  5%|▌         | 816/15000 [01:36<24:46,  9.54it/s]


 epoch: 815 | train_loss: 0.56, train_acc: 93.6% | test_loss: 0.54, test_acc: 94.3%

 epoch: 816 | train_loss: 0.56, train_acc: 93.4% | test_loss: 0.70, test_acc: 91.5%

 epoch: 817 | train_loss: 0.64, train_acc: 92.4% | test_loss: 0.56, test_acc: 93.5%


  5%|▌         | 820/15000 [01:36<23:46,  9.94it/s]


 epoch: 818 | train_loss: 0.62, train_acc: 92.6% | test_loss: 0.56, test_acc: 93.1%

 epoch: 819 | train_loss: 0.66, train_acc: 92.1% | test_loss: 0.60, test_acc: 92.9%

input:       plantations now cover vast areas of land but offer poorer habitats for many european forest dwelling species which require

target:      plantations now cover vast areas of land but offer poorer habitats for many european forest dwelling species which require mixture

prediction:  plantations now cover vast areas of land but offer poorer habitats for many european forest dwelling species which require the

 epoch: 820 | train_loss: 0.59, train_acc: 93.1% | test_loss: 0.63, test_acc: 93.0%


  5%|▌         | 822/15000 [01:36<23:42,  9.97it/s]


 epoch: 821 | train_loss: 0.55, train_acc: 93.8% | test_loss: 0.57, test_acc: 93.6%

 epoch: 822 | train_loss: 0.76, train_acc: 91.3% | test_loss: 0.67, test_acc: 92.0%

 epoch: 823 | train_loss: 0.59, train_acc: 93.1% | test_loss: 0.66, test_acc: 92.0%


  6%|▌         | 826/15000 [01:37<21:16, 11.10it/s]


 epoch: 824 | train_loss: 0.63, train_acc: 92.9% | test_loss: 0.64, test_acc: 92.7%

 epoch: 825 | train_loss: 0.68, train_acc: 92.1% | test_loss: 0.63, test_acc: 92.8%

 epoch: 826 | train_loss: 0.62, train_acc: 92.9% | test_loss: 0.62, test_acc: 92.5%


  6%|▌         | 828/15000 [01:37<20:53, 11.31it/s]


 epoch: 827 | train_loss: 0.64, train_acc: 92.5% | test_loss: 0.62, test_acc: 92.7%

 epoch: 828 | train_loss: 0.66, train_acc: 91.6% | test_loss: 0.61, test_acc: 93.0%

 epoch: 829 | train_loss: 0.57, train_acc: 93.2% | test_loss: 0.60, test_acc: 93.1%


  6%|▌         | 830/15000 [01:37<21:23, 11.04it/s]


input:       the southern cone located in the middle latitudes the continent cultural and ethnic outlook has its origin with the

target:      the southern cone located in the middle latitudes the continent cultural and ethnic outlook has its origin with the interaction

prediction:  the southern relative located in the middle latitudes the continent cultural and ethnic instruments has its origin with the the

 epoch: 830 | train_loss: 0.65, train_acc: 92.5% | test_loss: 0.57, test_acc: 93.4%

 epoch: 831 | train_loss: 0.58, train_acc: 93.6% | test_loss: 0.59, test_acc: 93.3%


  6%|▌         | 834/15000 [01:37<21:02, 11.22it/s]


 epoch: 832 | train_loss: 0.59, train_acc: 92.7% | test_loss: 0.65, test_acc: 92.1%

 epoch: 833 | train_loss: 0.60, train_acc: 92.9% | test_loss: 0.64, test_acc: 92.3%

 epoch: 834 | train_loss: 0.60, train_acc: 92.8% | test_loss: 0.57, test_acc: 93.3%

 epoch: 835 | train_loss: 0.62, train_acc: 93.0% | test_loss: 0.66, test_acc: 92.2%


  6%|▌         | 838/15000 [01:38<31:11,  7.57it/s]


 epoch: 836 | train_loss: 0.70, train_acc: 91.6% | test_loss: 0.71, test_acc: 91.6%

 epoch: 837 | train_loss: 0.59, train_acc: 93.1% | test_loss: 0.66, test_acc: 91.8%

 epoch: 838 | train_loss: 0.62, train_acc: 92.9% | test_loss: 0.58, test_acc: 93.3%


  6%|▌         | 840/15000 [01:38<28:44,  8.21it/s]


 epoch: 839 | train_loss: 0.62, train_acc: 92.5% | test_loss: 0.70, test_acc: 92.0%

input:       transitioning from an authoritarian regime to democracy the history of education examines the evolution of educational practices systems and

target:      transitioning from an authoritarian regime to democracy the history of education examines the evolution of educational practices systems and institutions

prediction:  files from an authoritarian regime to democracy the history of education examines the evolution of educational practices systems and the

 epoch: 840 | train_loss: 0.63, train_acc: 92.7% | test_loss: 0.61, test_acc: 93.0%

 epoch: 841 | train_loss: 0.63, train_acc: 92.5% | test_loss: 0.70, test_acc: 92.0%


  6%|▌         | 844/15000 [01:39<23:56,  9.85it/s]


 epoch: 842 | train_loss: 0.65, train_acc: 92.1% | test_loss: 0.61, test_acc: 92.9%

 epoch: 843 | train_loss: 0.64, train_acc: 92.8% | test_loss: 0.65, test_acc: 92.4%

 epoch: 844 | train_loss: 0.55, train_acc: 93.5% | test_loss: 0.60, test_acc: 93.1%


  6%|▌         | 846/15000 [01:39<23:02, 10.24it/s]


 epoch: 845 | train_loss: 0.63, train_acc: 92.6% | test_loss: 0.55, test_acc: 93.8%

 epoch: 846 | train_loss: 0.53, train_acc: 93.7% | test_loss: 0.61, test_acc: 92.9%

 epoch: 847 | train_loss: 0.60, train_acc: 92.8% | test_loss: 0.70, test_acc: 91.7%


  6%|▌         | 848/15000 [01:39<22:11, 10.63it/s]


 epoch: 848 | train_loss: 0.56, train_acc: 93.3% | test_loss: 0.63, test_acc: 92.5%

 epoch: 849 | train_loss: 0.61, train_acc: 92.9% | test_loss: 0.58, test_acc: 93.3%

input:       as with type inferred languages dynamically typed languages do not require the programmer to write explicit type annotations on

target:      as with type inferred languages dynamically typed languages do not require the programmer to write explicit type annotations on expressions

prediction:  as with type inferred languages dynamically typed languages do not require the programmer to write explicit type annotations on of


  6%|▌         | 852/15000 [01:40<31:29,  7.49it/s]


 epoch: 850 | train_loss: 0.61, train_acc: 93.1% | test_loss: 0.59, test_acc: 93.3%

 epoch: 851 | train_loss: 0.71, train_acc: 91.3% | test_loss: 0.64, test_acc: 92.5%

 epoch: 852 | train_loss: 0.67, train_acc: 92.3% | test_loss: 0.68, test_acc: 91.7%


  6%|▌         | 854/15000 [01:40<27:50,  8.47it/s]


 epoch: 853 | train_loss: 0.68, train_acc: 91.5% | test_loss: 0.68, test_acc: 91.9%

 epoch: 854 | train_loss: 0.61, train_acc: 92.8% | test_loss: 0.53, test_acc: 94.0%

 epoch: 855 | train_loss: 0.55, train_acc: 93.5% | test_loss: 0.60, test_acc: 93.1%


  6%|▌         | 858/15000 [01:40<23:33, 10.01it/s]


 epoch: 856 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.66, test_acc: 92.3%

 epoch: 857 | train_loss: 0.60, train_acc: 92.8% | test_loss: 0.59, test_acc: 92.8%

 epoch: 858 | train_loss: 0.56, train_acc: 93.5% | test_loss: 0.57, test_acc: 93.6%


  6%|▌         | 860/15000 [01:40<23:25, 10.06it/s]


 epoch: 859 | train_loss: 0.63, train_acc: 92.6% | test_loss: 0.64, test_acc: 92.4%

input:       human race has allowed the greater exploitation of natural resources and has helped to alleviate some of the risk

target:      human race has allowed the greater exploitation of natural resources and has helped to alleviate some of the risk from

prediction:  human race has allowed the greater exploitation of natural resources and has helped to years some of the risk the

 epoch: 860 | train_loss: 0.62, train_acc: 92.7% | test_loss: 0.59, test_acc: 93.1%

 epoch: 861 | train_loss: 0.56, train_acc: 93.6% | test_loss: 0.60, test_acc: 92.8%


  6%|▌         | 864/15000 [01:41<21:53, 10.76it/s]


 epoch: 862 | train_loss: 0.58, train_acc: 93.4% | test_loss: 0.63, test_acc: 93.0%

 epoch: 863 | train_loss: 0.60, train_acc: 93.4% | test_loss: 0.62, test_acc: 92.6%

 epoch: 864 | train_loss: 0.58, train_acc: 92.9% | test_loss: 0.57, test_acc: 93.3%


  6%|▌         | 866/15000 [01:41<28:44,  8.20it/s]


 epoch: 865 | train_loss: 0.57, train_acc: 94.0% | test_loss: 0.56, test_acc: 93.7%

 epoch: 866 | train_loss: 0.63, train_acc: 92.5% | test_loss: 0.64, test_acc: 92.7%

 epoch: 867 | train_loss: 0.54, train_acc: 93.9% | test_loss: 0.59, test_acc: 93.2%


  6%|▌         | 870/15000 [01:42<24:49,  9.49it/s]


 epoch: 868 | train_loss: 0.59, train_acc: 92.9% | test_loss: 0.60, test_acc: 93.0%

 epoch: 869 | train_loss: 0.63, train_acc: 92.7% | test_loss: 0.60, test_acc: 92.9%

input:       in the amphitheatre although certain dances were seen at times as non roman or unmanly dancing was embedded in

target:      in the amphitheatre although certain dances were seen at times as non roman or unmanly dancing was embedded in religious

prediction:  in the amphitheatre although certain quebec were seen at times as non roman or comedies dancing was embedded in the

 epoch: 870 | train_loss: 0.58, train_acc: 93.2% | test_loss: 0.62, test_acc: 93.1%


  6%|▌         | 872/15000 [01:42<23:06, 10.19it/s]


 epoch: 871 | train_loss: 0.52, train_acc: 93.7% | test_loss: 0.56, test_acc: 93.4%

 epoch: 872 | train_loss: 0.65, train_acc: 92.0% | test_loss: 0.67, test_acc: 92.3%

 epoch: 873 | train_loss: 0.60, train_acc: 92.8% | test_loss: 0.64, test_acc: 92.9%


  6%|▌         | 876/15000 [01:42<21:32, 10.93it/s]


 epoch: 874 | train_loss: 0.59, train_acc: 93.5% | test_loss: 0.59, test_acc: 92.9%

 epoch: 875 | train_loss: 0.55, train_acc: 93.2% | test_loss: 0.54, test_acc: 93.8%

 epoch: 876 | train_loss: 0.62, train_acc: 92.9% | test_loss: 0.56, test_acc: 93.4%


  6%|▌         | 878/15000 [01:42<21:14, 11.08it/s]


 epoch: 877 | train_loss: 0.57, train_acc: 93.3% | test_loss: 0.53, test_acc: 93.7%

 epoch: 878 | train_loss: 0.58, train_acc: 93.3% | test_loss: 0.57, test_acc: 93.5%


  6%|▌         | 880/15000 [01:43<38:00,  6.19it/s]


 epoch: 879 | train_loss: 0.59, train_acc: 92.9% | test_loss: 0.63, test_acc: 92.5%

input:       co emperor zeno who was then sole augustus faced numerous rebellions because of his tenuous claim to the throne

target:      co emperor zeno who was then sole augustus faced numerous rebellions because of his tenuous claim to the throne including

prediction:  co emperor zeno who was then sole augustus faced numerous rebellions because of his english claim to the throne the

 epoch: 880 | train_loss: 0.54, train_acc: 93.5% | test_loss: 0.59, test_acc: 93.4%

 epoch: 881 | train_loss: 0.64, train_acc: 92.6% | test_loss: 0.64, test_acc: 93.1%

  6%|▌         | 883/15000 [01:43<31:15,  7.53it/s]



 epoch: 882 | train_loss: 0.54, train_acc: 93.7% | test_loss: 0.54, test_acc: 93.7%

 epoch: 883 | train_loss: 0.58, train_acc: 93.0% | test_loss: 0.64, test_acc: 92.3%


  6%|▌         | 885/15000 [01:43<28:39,  8.21it/s]


 epoch: 884 | train_loss: 0.63, train_acc: 92.4% | test_loss: 0.54, test_acc: 93.5%

 epoch: 885 | train_loss: 0.55, train_acc: 93.4% | test_loss: 0.65, test_acc: 92.1%

 epoch: 886 | train_loss: 0.62, train_acc: 92.2% | test_loss: 0.63, test_acc: 92.2%


  6%|▌         | 888/15000 [01:44<26:06,  9.01it/s]


 epoch: 887 | train_loss: 0.57, train_acc: 93.2% | test_loss: 0.59, test_acc: 93.4%

 epoch: 888 | train_loss: 0.71, train_acc: 91.8% | test_loss: 0.56, test_acc: 93.4%


  6%|▌         | 890/15000 [01:44<27:41,  8.49it/s]


 epoch: 889 | train_loss: 0.62, train_acc: 92.6% | test_loss: 0.53, test_acc: 94.0%

input:       the ship the city could afford such large fleet it had over oarsmen because it owned lot of silver

target:      the ship the city could afford such large fleet it had over oarsmen because it owned lot of silver mines

prediction:  the ship the city could afford such large fleet it had over nobility because it owned lot of silver the

 epoch: 890 | train_loss: 0.62, train_acc: 92.4% | test_loss: 0.67, test_acc: 92.1%


  6%|▌         | 892/15000 [01:44<26:35,  8.84it/s]


 epoch: 891 | train_loss: 0.63, train_acc: 92.3% | test_loss: 0.54, test_acc: 93.3%

 epoch: 892 | train_loss: 0.58, train_acc: 93.4% | test_loss: 0.63, test_acc: 93.1%


  6%|▌         | 894/15000 [01:44<26:31,  8.86it/s]


 epoch: 893 | train_loss: 0.55, train_acc: 93.5% | test_loss: 0.61, test_acc: 92.7%

 epoch: 894 | train_loss: 0.61, train_acc: 92.3% | test_loss: 0.54, test_acc: 93.8%


  6%|▌         | 896/15000 [01:45<25:51,  9.09it/s]


 epoch: 895 | train_loss: 0.63, train_acc: 92.3% | test_loss: 0.55, test_acc: 93.8%

 epoch: 896 | train_loss: 0.56, train_acc: 93.5% | test_loss: 0.61, test_acc: 92.6%


  6%|▌         | 899/15000 [01:45<24:24,  9.63it/s]


 epoch: 897 | train_loss: 0.56, train_acc: 93.8% | test_loss: 0.56, test_acc: 93.2%

 epoch: 898 | train_loss: 0.52, train_acc: 94.2% | test_loss: 0.54, test_acc: 93.6%

 epoch: 899 | train_loss: 0.61, train_acc: 92.8% | test_loss: 0.59, test_acc: 92.9%


  6%|▌         | 901/15000 [01:45<26:53,  8.74it/s]


input:       disciplines such as culture sciences music and the arts the schooling ended at age followed by military training in

target:      disciplines such as culture sciences music and the arts the schooling ended at age followed by military training in the

prediction:  disciplines such as culture sciences music and the arts the schooling ended at age followed by military training in the

 epoch: 900 | train_loss: 0.64, train_acc: 92.5% | test_loss: 0.58, test_acc: 93.3%

 epoch: 901 | train_loss: 0.57, train_acc: 93.4% | test_loss: 0.59, test_acc: 93.1%


  6%|▌         | 903/15000 [01:45<26:13,  8.96it/s]


 epoch: 902 | train_loss: 0.62, train_acc: 92.9% | test_loss: 0.58, test_acc: 93.3%

 epoch: 903 | train_loss: 0.59, train_acc: 93.2% | test_loss: 0.54, test_acc: 93.8%


  6%|▌         | 905/15000 [01:46<26:19,  8.93it/s]


 epoch: 904 | train_loss: 0.64, train_acc: 92.8% | test_loss: 0.55, test_acc: 93.8%

 epoch: 905 | train_loss: 0.61, train_acc: 92.7% | test_loss: 0.63, test_acc: 92.5%


  6%|▌         | 907/15000 [01:46<26:22,  8.91it/s]


 epoch: 906 | train_loss: 0.57, train_acc: 92.7% | test_loss: 0.59, test_acc: 93.2%

 epoch: 907 | train_loss: 0.56, train_acc: 93.6% | test_loss: 0.67, test_acc: 91.8%


  6%|▌         | 909/15000 [01:46<25:51,  9.08it/s]


 epoch: 908 | train_loss: 0.57, train_acc: 93.2% | test_loss: 0.58, test_acc: 93.1%

 epoch: 909 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.62, test_acc: 93.0%

input:       ad the french port royal grammarians developed the idea that the grammars of all languages were reflection of the

target:      ad the french port royal grammarians developed the idea that the grammars of all languages were reflection of the universal


  6%|▌         | 910/15000 [01:46<29:03,  8.08it/s]


prediction:  ad the french port royal house developed the idea that the grammars of all languages were reflection of the the

 epoch: 910 | train_loss: 0.67, train_acc: 92.2% | test_loss: 0.55, test_acc: 93.8%

 epoch: 911 | train_loss: 0.65, train_acc: 92.8% | test_loss: 0.62, test_acc: 93.0%


  6%|▌         | 913/15000 [01:46<26:10,  8.97it/s]


 epoch: 912 | train_loss: 0.55, train_acc: 93.8% | test_loss: 0.58, test_acc: 93.4%

 epoch: 913 | train_loss: 0.57, train_acc: 93.7% | test_loss: 0.60, test_acc: 93.1%


  6%|▌         | 915/15000 [01:47<25:08,  9.33it/s]


 epoch: 914 | train_loss: 0.60, train_acc: 93.0% | test_loss: 0.64, test_acc: 92.7%

 epoch: 915 | train_loss: 0.55, train_acc: 93.2% | test_loss: 0.62, test_acc: 92.6%


  6%|▌         | 917/15000 [01:47<25:21,  9.26it/s]


 epoch: 916 | train_loss: 0.53, train_acc: 93.5% | test_loss: 0.61, test_acc: 92.8%

 epoch: 917 | train_loss: 0.55, train_acc: 93.7% | test_loss: 0.53, test_acc: 93.7%


  6%|▌         | 919/15000 [01:47<25:54,  9.06it/s]


 epoch: 918 | train_loss: 0.65, train_acc: 92.7% | test_loss: 0.56, test_acc: 93.5%

 epoch: 919 | train_loss: 0.63, train_acc: 92.4% | test_loss: 0.62, test_acc: 92.9%

input:       chile uruguay and paraguay were overthrown or displaced by military dictatorships in the and to curtail opposition their governments

target:      chile uruguay and paraguay were overthrown or displaced by military dictatorships in the and to curtail opposition their governments detained

prediction:  chile uruguay and paraguay were overthrown or displaced by military dictatorships in the and to stated opposition their governments the


  6%|▌         | 920/15000 [01:47<27:36,  8.50it/s]


 epoch: 920 | train_loss: 0.50, train_acc: 94.2% | test_loss: 0.66, test_acc: 92.0%


  6%|▌         | 923/15000 [01:48<40:02,  5.86it/s]


 epoch: 921 | train_loss: 0.66, train_acc: 92.3% | test_loss: 0.57, test_acc: 93.2%

 epoch: 922 | train_loss: 0.56, train_acc: 93.3% | test_loss: 0.50, test_acc: 94.4%

 epoch: 923 | train_loss: 0.59, train_acc: 92.9% | test_loss: 0.51, test_acc: 94.3%


  6%|▌         | 925/15000 [01:48<32:24,  7.24it/s]


 epoch: 924 | train_loss: 0.50, train_acc: 94.4% | test_loss: 0.55, test_acc: 93.6%

 epoch: 925 | train_loss: 0.58, train_acc: 92.9% | test_loss: 0.60, test_acc: 92.8%

 epoch: 926 | train_loss: 0.57, train_acc: 93.5% | test_loss: 0.59, test_acc: 93.1%


  6%|▌         | 929/15000 [01:48<24:54,  9.41it/s]


 epoch: 927 | train_loss: 0.61, train_acc: 93.0% | test_loss: 0.64, test_acc: 92.5%

 epoch: 928 | train_loss: 0.59, train_acc: 92.8% | test_loss: 0.60, test_acc: 92.7%

 epoch: 929 | train_loss: 0.57, train_acc: 93.6% | test_loss: 0.59, test_acc: 93.2%


  6%|▌         | 931/15000 [01:49<24:15,  9.67it/s]


input:       curtain the united states and western europe established the nato alliance and later the soviet union and central europe

target:      curtain the united states and western europe established the nato alliance and later the soviet union and central europe established

prediction:  civilizations the united states and western europe established the nato alliance and later the soviet union and central europe the

 epoch: 930 | train_loss: 0.70, train_acc: 91.5% | test_loss: 0.59, test_acc: 92.9%

 epoch: 931 | train_loss: 0.67, train_acc: 92.4% | test_loss: 0.55, test_acc: 93.5%


  6%|▌         | 933/15000 [01:49<23:28,  9.99it/s]


 epoch: 932 | train_loss: 0.55, train_acc: 93.4% | test_loss: 0.54, test_acc: 93.6%

 epoch: 933 | train_loss: 0.57, train_acc: 93.5% | test_loss: 0.62, test_acc: 93.0%

 epoch: 934 | train_loss: 0.56, train_acc: 93.6% | test_loss: 0.55, test_acc: 93.3%


  6%|▌         | 937/15000 [01:50<32:38,  7.18it/s]


 epoch: 935 | train_loss: 0.66, train_acc: 92.2% | test_loss: 0.57, test_acc: 93.2%

 epoch: 936 | train_loss: 0.56, train_acc: 93.1% | test_loss: 0.56, test_acc: 93.4%

 epoch: 937 | train_loss: 0.55, train_acc: 93.3% | test_loss: 0.51, test_acc: 93.6%


  6%|▋         | 939/15000 [01:50<28:29,  8.22it/s]


 epoch: 938 | train_loss: 0.54, train_acc: 93.6% | test_loss: 0.53, test_acc: 93.8%

 epoch: 939 | train_loss: 0.56, train_acc: 93.6% | test_loss: 0.62, test_acc: 92.9%

input:       by polygon as quietly dominant although in thanks in part to pewdiepie playthroughs of the game minecraft experienced visible

target:      by polygon as quietly dominant although in thanks in part to pewdiepie playthroughs of the game minecraft experienced visible uptick

prediction:  by legions as organize dominant although in institutions in part to keeping gods of the game minecraft experienced visible the

 epoch: 940 | train_loss: 0.52, train_acc: 94.0% | test_loss: 0.58, test_acc: 93.8%


  6%|▋         | 943/15000 [01:50<24:54,  9.40it/s]


 epoch: 941 | train_loss: 0.63, train_acc: 92.9% | test_loss: 0.70, test_acc: 92.1%

 epoch: 942 | train_loss: 0.54, train_acc: 93.9% | test_loss: 0.59, test_acc: 92.8%

 epoch: 943 | train_loss: 0.59, train_acc: 92.9% | test_loss: 0.63, test_acc: 92.6%


  6%|▋         | 945/15000 [01:50<23:37,  9.92it/s]


 epoch: 944 | train_loss: 0.57, train_acc: 93.3% | test_loss: 0.51, test_acc: 93.9%

 epoch: 945 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.60, test_acc: 92.8%

 epoch: 946 | train_loss: 0.55, train_acc: 93.8% | test_loss: 0.50, test_acc: 94.2%


  6%|▋         | 949/15000 [01:51<22:11, 10.55it/s]


 epoch: 947 | train_loss: 0.53, train_acc: 93.7% | test_loss: 0.55, test_acc: 93.3%

 epoch: 948 | train_loss: 0.48, train_acc: 94.4% | test_loss: 0.55, test_acc: 93.8%

 epoch: 949 | train_loss: 0.58, train_acc: 93.1% | test_loss: 0.61, test_acc: 93.0%

input:       of eurasia into asia and europe as residue of eurocentrism in physical cultural and historical diversity china and india

target:      of eurasia into asia and europe as residue of eurocentrism in physical cultural and historical diversity china and india are

prediction:  of eurasia into asia and europe as rice of peloponnesian in physical cultural and historical diversity china and india the


  6%|▋         | 951/15000 [01:51<35:11,  6.65it/s]


 epoch: 950 | train_loss: 0.60, train_acc: 92.8% | test_loss: 0.54, test_acc: 93.8%

 epoch: 951 | train_loss: 0.52, train_acc: 94.2% | test_loss: 0.57, test_acc: 93.6%

 epoch: 952 | train_loss: 0.61, train_acc: 92.5% | test_loss: 0.54, test_acc: 93.8%


  6%|▋         | 955/15000 [01:52<26:59,  8.67it/s]


 epoch: 953 | train_loss: 0.54, train_acc: 93.4% | test_loss: 0.56, test_acc: 93.7%

 epoch: 954 | train_loss: 0.49, train_acc: 94.3% | test_loss: 0.54, test_acc: 93.9%

 epoch: 955 | train_loss: 0.57, train_acc: 93.2% | test_loss: 0.57, test_acc: 93.4%


  6%|▋         | 957/15000 [01:52<24:50,  9.42it/s]


 epoch: 956 | train_loss: 0.55, train_acc: 93.5% | test_loss: 0.58, test_acc: 93.1%

 epoch: 957 | train_loss: 0.51, train_acc: 93.8% | test_loss: 0.60, test_acc: 92.8%

 epoch: 958 | train_loss: 0.53, train_acc: 93.8% | test_loss: 0.57, test_acc: 93.7%


  6%|▋         | 959/15000 [01:52<23:37,  9.90it/s]


 epoch: 959 | train_loss: 0.54, train_acc: 93.4% | test_loss: 0.63, test_acc: 92.1%

input:       canal this makes egypt transcontinental country with the sinai peninsula in asia and the remainder of the country in

target:      canal this makes egypt transcontinental country with the sinai peninsula in asia and the remainder of the country in africa

prediction:  canal this makes egypt transcontinental country with the sinai peninsula in asia and the remainder of the country in the

 epoch: 960 | train_loss: 0.56, train_acc: 93.3% | test_loss: 0.63, test_acc: 92.3%


  6%|▋         | 963/15000 [01:52<23:26,  9.98it/s]


 epoch: 961 | train_loss: 0.54, train_acc: 93.7% | test_loss: 0.59, test_acc: 93.3%

 epoch: 962 | train_loss: 0.55, train_acc: 93.2% | test_loss: 0.49, test_acc: 94.3%

 epoch: 963 | train_loss: 0.63, train_acc: 92.9% | test_loss: 0.55, test_acc: 93.7%


  6%|▋         | 965/15000 [01:53<38:14,  6.12it/s]


 epoch: 964 | train_loss: 0.51, train_acc: 93.9% | test_loss: 0.59, test_acc: 93.2%

 epoch: 965 | train_loss: 0.58, train_acc: 93.5% | test_loss: 0.60, test_acc: 92.9%

 epoch: 966 | train_loss: 0.58, train_acc: 93.3% | test_loss: 0.55, test_acc: 93.1%


  6%|▋         | 969/15000 [01:53<28:58,  8.07it/s]


 epoch: 967 | train_loss: 0.55, train_acc: 93.2% | test_loss: 0.58, test_acc: 93.3%

 epoch: 968 | train_loss: 0.55, train_acc: 93.6% | test_loss: 0.58, test_acc: 92.9%

 epoch: 969 | train_loss: 0.60, train_acc: 93.1% | test_loss: 0.57, test_acc: 93.0%


  6%|▋         | 971/15000 [01:54<27:34,  8.48it/s]


input:       elected augustus in nicaea bithynia the army had been left leaderless twice in less than year and the officers

target:      elected augustus in nicaea bithynia the army had been left leaderless twice in less than year and the officers demanded

prediction:  elected augustus in hope during the army had been left leads twice in less than year and the officers the

 epoch: 970 | train_loss: 0.52, train_acc: 93.9% | test_loss: 0.58, test_acc: 93.3%

 epoch: 971 | train_loss: 0.52, train_acc: 93.6% | test_loss: 0.58, test_acc: 93.0%


  6%|▋         | 973/15000 [01:54<25:13,  9.27it/s]


 epoch: 972 | train_loss: 0.54, train_acc: 93.4% | test_loss: 0.57, test_acc: 93.6%

 epoch: 973 | train_loss: 0.58, train_acc: 93.0% | test_loss: 0.68, test_acc: 91.5%

 epoch: 974 | train_loss: 0.50, train_acc: 93.8% | test_loss: 0.52, test_acc: 94.0%


  7%|▋         | 977/15000 [01:54<22:47, 10.26it/s]


 epoch: 975 | train_loss: 0.51, train_acc: 94.1% | test_loss: 0.69, test_acc: 91.6%

 epoch: 976 | train_loss: 0.57, train_acc: 93.0% | test_loss: 0.59, test_acc: 92.4%

 epoch: 977 | train_loss: 0.56, train_acc: 93.4% | test_loss: 0.53, test_acc: 94.2%


  7%|▋         | 980/15000 [01:55<36:18,  6.44it/s]


 epoch: 978 | train_loss: 0.57, train_acc: 93.2% | test_loss: 0.61, test_acc: 92.9%

 epoch: 979 | train_loss: 0.56, train_acc: 93.3% | test_loss: 0.58, test_acc: 93.2%

input:       the middle of root affixes serve to modify or elaborate the meaning of the root some languages change the

target:      the middle of root affixes serve to modify or elaborate the meaning of the root some languages change the meaning

prediction:  the middle of root opposite serve to modify or elaborate the meaning of the root some languages change the the

 epoch: 980 | train_loss: 0.59, train_acc: 92.9% | test_loss: 0.57, test_acc: 93.2%


  7%|▋         | 982/15000 [01:55<30:30,  7.66it/s]


 epoch: 981 | train_loss: 0.63, train_acc: 92.6% | test_loss: 0.53, test_acc: 93.8%

 epoch: 982 | train_loss: 0.52, train_acc: 94.2% | test_loss: 0.54, test_acc: 93.8%

 epoch: 983 | train_loss: 0.53, train_acc: 94.0% | test_loss: 0.49, test_acc: 94.3%


  7%|▋         | 986/15000 [01:55<24:35,  9.50it/s]


 epoch: 984 | train_loss: 0.53, train_acc: 93.5% | test_loss: 0.56, test_acc: 93.5%

 epoch: 985 | train_loss: 0.55, train_acc: 93.3% | test_loss: 0.50, test_acc: 93.9%

 epoch: 986 | train_loss: 0.54, train_acc: 93.1% | test_loss: 0.53, test_acc: 94.0%


  7%|▋         | 988/15000 [01:55<22:50, 10.23it/s]


 epoch: 987 | train_loss: 0.58, train_acc: 93.1% | test_loss: 0.59, test_acc: 92.9%

 epoch: 988 | train_loss: 0.54, train_acc: 92.9% | test_loss: 0.55, test_acc: 93.4%

 epoch: 989 | train_loss: 0.53, train_acc: 94.1% | test_loss: 0.56, test_acc: 93.2%


  7%|▋         | 990/15000 [01:56<23:14, 10.05it/s]


input:       of the world population similar number of people speak the afroasiatic languages which include the populous semitic languages such

target:      of the world population similar number of people speak the afroasiatic languages which include the populous semitic languages such as

prediction:  of the world population similar number of people speak the normally languages which include the populous semitic languages such the

 epoch: 990 | train_loss: 0.65, train_acc: 92.2% | test_loss: 0.55, test_acc: 93.4%

 epoch: 991 | train_loss: 0.53, train_acc: 94.0% | test_loss: 0.51, test_acc: 94.0%


  7%|▋         | 992/15000 [01:56<22:24, 10.42it/s]


 epoch: 992 | train_loss: 0.60, train_acc: 92.6% | test_loss: 0.56, test_acc: 93.5%


  7%|▋         | 994/15000 [01:56<31:36,  7.39it/s]


 epoch: 993 | train_loss: 0.56, train_acc: 93.0% | test_loss: 0.58, test_acc: 92.9%

 epoch: 994 | train_loss: 0.49, train_acc: 94.5% | test_loss: 0.57, test_acc: 93.4%

 epoch: 995 | train_loss: 0.55, train_acc: 93.8% | test_loss: 0.61, test_acc: 93.0%


  7%|▋         | 998/15000 [01:57<25:13,  9.25it/s]


 epoch: 996 | train_loss: 0.55, train_acc: 93.8% | test_loss: 0.58, test_acc: 93.4%

 epoch: 997 | train_loss: 0.53, train_acc: 93.6% | test_loss: 0.57, test_acc: 93.4%

 epoch: 998 | train_loss: 0.51, train_acc: 93.6% | test_loss: 0.59, test_acc: 92.9%


  7%|▋         | 1000/15000 [01:57<24:16,  9.61it/s]


 epoch: 999 | train_loss: 0.58, train_acc: 92.9% | test_loss: 0.52, test_acc: 93.8%

input:       stated no religion in recent australian and new zealand censuses large proportions of the population say they belong to

target:      stated no religion in recent australian and new zealand censuses large proportions of the population say they belong to no

prediction:  stated no religion in recent australian and new zealand minority large proportions of the population say they belong to the

 epoch: 1000 | train_loss: 0.55, train_acc: 93.2% | test_loss: 0.60, test_acc: 93.5%

 epoch: 1001 | train_loss: 0.52, train_acc: 93.8% | test_loss: 0.51, test_acc: 94.1%


  7%|▋         | 1004/15000 [01:57<23:01, 10.13it/s]


 epoch: 1002 | train_loss: 0.58, train_acc: 93.0% | test_loss: 0.55, test_acc: 93.5%

 epoch: 1003 | train_loss: 0.57, train_acc: 93.5% | test_loss: 0.56, test_acc: 93.6%

 epoch: 1004 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.55, test_acc: 93.3%


  7%|▋         | 1006/15000 [01:57<23:24,  9.97it/s]


 epoch: 1005 | train_loss: 0.57, train_acc: 93.1% | test_loss: 0.55, test_acc: 93.2%

 epoch: 1006 | train_loss: 0.59, train_acc: 92.8% | test_loss: 0.54, test_acc: 93.4%


  7%|▋         | 1009/15000 [01:58<34:59,  6.66it/s]


 epoch: 1007 | train_loss: 0.59, train_acc: 93.0% | test_loss: 0.58, test_acc: 92.8%

 epoch: 1008 | train_loss: 0.59, train_acc: 93.1% | test_loss: 0.56, test_acc: 93.9%

 epoch: 1009 | train_loss: 0.52, train_acc: 94.2% | test_loss: 0.52, test_acc: 94.2%


  7%|▋         | 1011/15000 [01:58<33:07,  7.04it/s]


input:       has historically been extremely variable with its area rapidly fluctuating and at times disappearing depending on global climatic conditions

target:      has historically been extremely variable with its area rapidly fluctuating and at times disappearing depending on global climatic conditions at

prediction:  has historically been extremely variable with its area rapidly strabo and at times disappearing depending on global climatic conditions the

 epoch: 1010 | train_loss: 0.52, train_acc: 94.3% | test_loss: 0.57, test_acc: 93.3%

 epoch: 1011 | train_loss: 0.57, train_acc: 93.4% | test_loss: 0.57, test_acc: 93.0%


  7%|▋         | 1014/15000 [01:59<27:39,  8.43it/s]


 epoch: 1012 | train_loss: 0.56, train_acc: 93.3% | test_loss: 0.52, test_acc: 94.0%

 epoch: 1013 | train_loss: 0.53, train_acc: 93.7% | test_loss: 0.57, test_acc: 93.1%

 epoch: 1014 | train_loss: 0.51, train_acc: 94.2% | test_loss: 0.58, test_acc: 92.9%


  7%|▋         | 1016/15000 [01:59<26:54,  8.66it/s]


 epoch: 1015 | train_loss: 0.53, train_acc: 93.6% | test_loss: 0.46, test_acc: 94.1%

 epoch: 1016 | train_loss: 0.53, train_acc: 93.8% | test_loss: 0.49, test_acc: 94.4%


  7%|▋         | 1018/15000 [01:59<26:52,  8.67it/s]


 epoch: 1017 | train_loss: 0.60, train_acc: 92.9% | test_loss: 0.56, test_acc: 93.7%

 epoch: 1018 | train_loss: 0.61, train_acc: 93.1% | test_loss: 0.56, test_acc: 93.4%


  7%|▋         | 1019/15000 [01:59<26:38,  8.75it/s]


 epoch: 1019 | train_loss: 0.52, train_acc: 94.0% | test_loss: 0.50, test_acc: 94.4%

input:       were later termed first generation programming languages gl the next step was the development of the so called second

target:      were later termed first generation programming languages gl the next step was the development of the so called second generation

prediction:  were later termed first generation programming languages gl the next step was the development of the so called second the


  7%|▋         | 1021/15000 [02:00<50:01,  4.66it/s]


 epoch: 1020 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.56, test_acc: 93.2%

 epoch: 1021 | train_loss: 0.49, train_acc: 94.9% | test_loss: 0.51, test_acc: 94.1%


  7%|▋         | 1023/15000 [02:00<37:49,  6.16it/s]


 epoch: 1022 | train_loss: 0.61, train_acc: 92.7% | test_loss: 0.63, test_acc: 92.4%

 epoch: 1023 | train_loss: 0.55, train_acc: 93.1% | test_loss: 0.50, test_acc: 94.0%


  7%|▋         | 1025/15000 [02:00<31:57,  7.29it/s]


 epoch: 1024 | train_loss: 0.53, train_acc: 93.7% | test_loss: 0.57, test_acc: 93.1%

 epoch: 1025 | train_loss: 0.64, train_acc: 92.5% | test_loss: 0.51, test_acc: 94.5%


  7%|▋         | 1028/15000 [02:01<25:00,  9.31it/s]


 epoch: 1026 | train_loss: 0.54, train_acc: 93.5% | test_loss: 0.47, test_acc: 94.8%

 epoch: 1027 | train_loss: 0.56, train_acc: 93.3% | test_loss: 0.54, test_acc: 93.6%

 epoch: 1028 | train_loss: 0.50, train_acc: 94.2% | test_loss: 0.59, test_acc: 93.4%


  7%|▋         | 1030/15000 [02:01<26:14,  8.87it/s]


 epoch: 1029 | train_loss: 0.54, train_acc: 93.7% | test_loss: 0.53, test_acc: 93.8%

input:       have existed alongside hunter gatherer cultures it is speculated that by bc cattle were domesticated in north africa in

target:      have existed alongside hunter gatherer cultures it is speculated that by bc cattle were domesticated in north africa in the

prediction:  have existed alongside hunter gatherer cultures it is commonwealth that by bc cattle were domesticated in north africa in the

 epoch: 1030 | train_loss: 0.58, train_acc: 93.2% | test_loss: 0.48, test_acc: 94.2%


  7%|▋         | 1033/15000 [02:01<22:52, 10.17it/s]


 epoch: 1031 | train_loss: 0.62, train_acc: 92.6% | test_loss: 0.54, test_acc: 93.7%

 epoch: 1032 | train_loss: 0.52, train_acc: 93.6% | test_loss: 0.54, test_acc: 93.6%

 epoch: 1033 | train_loss: 0.57, train_acc: 93.3% | test_loss: 0.59, test_acc: 93.3%


  7%|▋         | 1035/15000 [02:01<25:31,  9.12it/s]


 epoch: 1034 | train_loss: 0.59, train_acc: 93.0% | test_loss: 0.60, test_acc: 93.4%

 epoch: 1035 | train_loss: 0.56, train_acc: 93.3% | test_loss: 0.48, test_acc: 94.1%


  7%|▋         | 1037/15000 [02:02<23:15, 10.00it/s]


 epoch: 1036 | train_loss: 0.56, train_acc: 93.5% | test_loss: 0.53, test_acc: 93.7%

 epoch: 1037 | train_loss: 0.53, train_acc: 94.1% | test_loss: 0.59, test_acc: 93.1%

 epoch: 1038 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.55, test_acc: 93.7%


  7%|▋         | 1041/15000 [02:02<22:23, 10.39it/s]


 epoch: 1039 | train_loss: 0.58, train_acc: 93.2% | test_loss: 0.49, test_acc: 94.3%

input:       el meleq site in egypt although no direct matches to the djehutynakht sequence have been reported haplogroup is found

target:      el meleq site in egypt although no direct matches to the djehutynakht sequence have been reported haplogroup is found in

prediction:  el meleq site in egypt although no direct matches to the ceo sequence have been reported haplogroup is found the

 epoch: 1040 | train_loss: 0.52, train_acc: 93.9% | test_loss: 0.55, test_acc: 93.6%

 epoch: 1041 | train_loss: 0.64, train_acc: 92.5% | test_loss: 0.62, test_acc: 93.0%


  7%|▋         | 1043/15000 [02:02<21:32, 10.80it/s]


 epoch: 1042 | train_loss: 0.48, train_acc: 94.4% | test_loss: 0.58, test_acc: 93.4%

 epoch: 1043 | train_loss: 0.49, train_acc: 93.8% | test_loss: 0.64, test_acc: 92.8%

 epoch: 1044 | train_loss: 0.51, train_acc: 93.8% | test_loss: 0.52, test_acc: 93.6%


  7%|▋         | 1047/15000 [02:02<20:45, 11.20it/s]


 epoch: 1045 | train_loss: 0.57, train_acc: 93.0% | test_loss: 0.51, test_acc: 94.0%

 epoch: 1046 | train_loss: 0.47, train_acc: 94.5% | test_loss: 0.58, test_acc: 93.1%

 epoch: 1047 | train_loss: 0.47, train_acc: 95.0% | test_loss: 0.54, test_acc: 93.9%

 epoch: 1048 | train_loss: 0.53, train_acc: 94.0% | test_loss: 0.60, test_acc: 92.7%


  7%|▋         | 1050/15000 [02:03<34:55,  6.66it/s]


 epoch: 1049 | train_loss: 0.56, train_acc: 93.5% | test_loss: 0.63, test_acc: 92.6%

input:       continent the trend was at scale unprecedented over the last years the most dominant driver of the mode of

target:      continent the trend was at scale unprecedented over the last years the most dominant driver of the mode of variability

prediction:  continent the trend was at scale unprecedented over the last years the most dominant figure of the mode of the

 epoch: 1050 | train_loss: 0.52, train_acc: 93.8% | test_loss: 0.50, test_acc: 94.1%

 epoch: 1051 | train_loss: 0.53, train_acc: 93.4% | test_loss: 0.53, test_acc: 93.5%

  7%|▋         | 1052/15000 [02:03<29:47,  7.80it/s]



 epoch: 1052 | train_loss: 0.49, train_acc: 94.2% | test_loss: 0.58, test_acc: 92.9%

 epoch: 1053 | train_loss: 0.56, train_acc: 93.5% | test_loss: 0.57, test_acc: 93.6%


  7%|▋         | 1056/15000 [02:04<23:46,  9.78it/s]


 epoch: 1054 | train_loss: 0.59, train_acc: 92.9% | test_loss: 0.59, test_acc: 93.2%

 epoch: 1055 | train_loss: 0.48, train_acc: 94.7% | test_loss: 0.65, test_acc: 92.5%

 epoch: 1056 | train_loss: 0.51, train_acc: 93.6% | test_loss: 0.58, test_acc: 93.2%


  7%|▋         | 1058/15000 [02:04<22:29, 10.33it/s]


 epoch: 1057 | train_loss: 0.57, train_acc: 93.1% | test_loss: 0.53, test_acc: 93.6%

 epoch: 1058 | train_loss: 0.45, train_acc: 94.2% | test_loss: 0.49, test_acc: 94.5%

 epoch: 1059 | train_loss: 0.51, train_acc: 94.1% | test_loss: 0.51, test_acc: 94.0%


  7%|▋         | 1060/15000 [02:04<23:15,  9.99it/s]


input:       become one of the country most celebrated songwriters mid th century american pop stars such as bing crosby frank

target:      become one of the country most celebrated songwriters mid th century american pop stars such as bing crosby frank sinatra

prediction:  become one of the country most celebrated earlier mid th century american pop stars such as easter neutral frank the

 epoch: 1060 | train_loss: 0.49, train_acc: 94.4% | test_loss: 0.54, test_acc: 93.8%

 epoch: 1061 | train_loss: 0.52, train_acc: 93.8% | test_loss: 0.53, test_acc: 93.5%


  7%|▋         | 1062/15000 [02:04<22:46, 10.20it/s]


 epoch: 1062 | train_loss: 0.52, train_acc: 93.5% | test_loss: 0.54, test_acc: 93.4%


  7%|▋         | 1064/15000 [02:05<38:35,  6.02it/s]


 epoch: 1063 | train_loss: 0.54, train_acc: 93.5% | test_loss: 0.51, test_acc: 93.9%

 epoch: 1064 | train_loss: 0.49, train_acc: 94.1% | test_loss: 0.56, test_acc: 93.6%

 epoch: 1065 | train_loss: 0.50, train_acc: 94.2% | test_loss: 0.58, test_acc: 93.0%


  7%|▋         | 1068/15000 [02:05<29:13,  7.95it/s]


 epoch: 1066 | train_loss: 0.49, train_acc: 93.8% | test_loss: 0.49, test_acc: 94.3%

 epoch: 1067 | train_loss: 0.50, train_acc: 93.8% | test_loss: 0.49, test_acc: 94.2%

 epoch: 1068 | train_loss: 0.47, train_acc: 94.5% | test_loss: 0.48, test_acc: 94.3%


  7%|▋         | 1070/15000 [02:05<27:09,  8.55it/s]


 epoch: 1069 | train_loss: 0.47, train_acc: 94.3% | test_loss: 0.61, test_acc: 92.7%

input:       however the division was merely nominal actual authority in the west still rested with gratian and with valens as

target:      however the division was merely nominal actual authority in the west still rested with gratian and with valens as the

prediction:  however the division was merely nominal actual authority in the west still rested with gratian and with valens as the

 epoch: 1070 | train_loss: 0.48, train_acc: 94.4% | test_loss: 0.53, test_acc: 93.5%


  7%|▋         | 1072/15000 [02:06<25:22,  9.15it/s]


 epoch: 1071 | train_loss: 0.46, train_acc: 94.3% | test_loss: 0.52, test_acc: 93.8%

 epoch: 1072 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.55, test_acc: 93.6%

 epoch: 1073 | train_loss: 0.53, train_acc: 93.8% | test_loss: 0.53, test_acc: 94.1%


  7%|▋         | 1076/15000 [02:06<22:26, 10.34it/s]


 epoch: 1074 | train_loss: 0.49, train_acc: 94.2% | test_loss: 0.45, test_acc: 94.5%

 epoch: 1075 | train_loss: 0.52, train_acc: 94.1% | test_loss: 0.52, test_acc: 93.7%

 epoch: 1076 | train_loss: 0.53, train_acc: 93.8% | test_loss: 0.52, test_acc: 94.1%


  7%|▋         | 1078/15000 [02:06<23:12, 10.00it/s]


 epoch: 1077 | train_loss: 0.51, train_acc: 93.8% | test_loss: 0.54, test_acc: 93.2%

 epoch: 1078 | train_loss: 0.54, train_acc: 93.4% | test_loss: 0.49, test_acc: 94.3%


  7%|▋         | 1080/15000 [02:06<23:00, 10.08it/s]


 epoch: 1079 | train_loss: 0.56, train_acc: 93.6% | test_loss: 0.55, test_acc: 93.3%

input:       seized timbuktu in and jenne in building his regime on trade revenues and the cooperation of muslim merchants his

target:      seized timbuktu in and jenne in building his regime on trade revenues and the cooperation of muslim merchants his successor

prediction:  seized consistent in and contract in building his regime on trade revenues and the cooperation of muslim merchants his the

 epoch: 1080 | train_loss: 0.50, train_acc: 94.0% | test_loss: 0.49, test_acc: 94.4%

 epoch: 1081 | train_loss: 0.50, train_acc: 94.2% | test_loss: 0.49, test_acc: 94.4%


  7%|▋         | 1084/15000 [02:07<20:55, 11.08it/s]


 epoch: 1082 | train_loss: 0.50, train_acc: 94.1% | test_loss: 0.58, test_acc: 93.1%

 epoch: 1083 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.52, test_acc: 93.9%

 epoch: 1084 | train_loss: 0.44, train_acc: 94.8% | test_loss: 0.60, test_acc: 93.0%


  7%|▋         | 1086/15000 [02:07<20:20, 11.40it/s]


 epoch: 1085 | train_loss: 0.49, train_acc: 93.9% | test_loss: 0.50, test_acc: 94.4%

 epoch: 1086 | train_loss: 0.54, train_acc: 93.5% | test_loss: 0.59, test_acc: 92.7%

 epoch: 1087 | train_loss: 0.47, train_acc: 94.5% | test_loss: 0.53, test_acc: 93.8%


  7%|▋         | 1088/15000 [02:07<20:20, 11.40it/s]


 epoch: 1088 | train_loss: 0.53, train_acc: 93.9% | test_loss: 0.56, test_acc: 93.1%

 epoch: 1089 | train_loss: 0.45, train_acc: 94.7% | test_loss: 0.49, test_acc: 94.3%

input:       informally during everyday activities education was achieved through oral communication and imitation it could take the form of storytelling

target:      informally during everyday activities education was achieved through oral communication and imitation it could take the form of storytelling and

prediction:  informally during everyday activities education was achieved through oral communication and imitation it could take the form of storytelling the


  7%|▋         | 1090/15000 [02:07<21:38, 10.71it/s]


 epoch: 1090 | train_loss: 0.53, train_acc: 93.9% | test_loss: 0.48, test_acc: 94.4%

 epoch: 1091 | train_loss: 0.48, train_acc: 94.3% | test_loss: 0.56, test_acc: 93.6%


  7%|▋         | 1094/15000 [02:08<30:51,  7.51it/s]


 epoch: 1092 | train_loss: 0.49, train_acc: 94.1% | test_loss: 0.52, test_acc: 93.7%

 epoch: 1093 | train_loss: 0.55, train_acc: 93.6% | test_loss: 0.58, test_acc: 92.9%

 epoch: 1094 | train_loss: 0.51, train_acc: 94.1% | test_loss: 0.52, test_acc: 93.8%


  7%|▋         | 1096/15000 [02:08<27:15,  8.50it/s]


 epoch: 1095 | train_loss: 0.51, train_acc: 94.2% | test_loss: 0.57, test_acc: 93.4%

 epoch: 1096 | train_loss: 0.52, train_acc: 93.8% | test_loss: 0.54, test_acc: 93.6%

 epoch: 1097 | train_loss: 0.50, train_acc: 94.3% | test_loss: 0.52, test_acc: 94.2%


  7%|▋         | 1100/15000 [02:09<23:50,  9.72it/s]


 epoch: 1098 | train_loss: 0.51, train_acc: 93.8% | test_loss: 0.55, test_acc: 93.7%

 epoch: 1099 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.50, test_acc: 94.2%

input:       as the american revolutionary war progressed france and spain both then enemies of britain began to ultimately see the

target:      as the american revolutionary war progressed france and spain both then enemies of britain began to ultimately see the promise

prediction:  as the american revolutionary war progressed france and spain both then enemies of britain began to ultimately see the the

 epoch: 1100 | train_loss: 0.52, train_acc: 93.9% | test_loss: 0.52, test_acc: 93.4%


  7%|▋         | 1102/15000 [02:09<22:20, 10.37it/s]


 epoch: 1101 | train_loss: 0.52, train_acc: 94.0% | test_loss: 0.50, test_acc: 94.3%

 epoch: 1102 | train_loss: 0.52, train_acc: 93.8% | test_loss: 0.53, test_acc: 94.0%

 epoch: 1103 | train_loss: 0.50, train_acc: 94.2% | test_loss: 0.54, test_acc: 93.7%


  7%|▋         | 1104/15000 [02:09<21:44, 10.66it/s]


 epoch: 1104 | train_loss: 0.53, train_acc: 93.3% | test_loss: 0.53, test_acc: 93.8%

 epoch: 1105 | train_loss: 0.55, train_acc: 93.5% | test_loss: 0.64, test_acc: 92.7%


  7%|▋         | 1108/15000 [02:10<31:22,  7.38it/s]


 epoch: 1106 | train_loss: 0.49, train_acc: 93.5% | test_loss: 0.50, test_acc: 94.0%

 epoch: 1107 | train_loss: 0.45, train_acc: 94.6% | test_loss: 0.55, test_acc: 93.2%

 epoch: 1108 | train_loss: 0.51, train_acc: 93.9% | test_loss: 0.48, test_acc: 94.5%


  7%|▋         | 1110/15000 [02:10<28:43,  8.06it/s]


 epoch: 1109 | train_loss: 0.47, train_acc: 94.8% | test_loss: 0.47, test_acc: 94.6%

input:       that concerns itself with all aspects of language examining it from all of the theoretical viewpoints described above the

target:      that concerns itself with all aspects of language examining it from all of the theoretical viewpoints described above the academic

prediction:  that concerns itself with all aspects of language examining it from all of the theoretical aspects described above the the

 epoch: 1110 | train_loss: 0.43, train_acc: 94.6% | test_loss: 0.50, test_acc: 94.3%

 epoch: 1111 | train_loss: 0.56, train_acc: 93.3% | test_loss: 0.55, test_acc: 93.7%


  7%|▋         | 1114/15000 [02:10<23:11,  9.98it/s]


 epoch: 1112 | train_loss: 0.48, train_acc: 93.8% | test_loss: 0.57, test_acc: 92.8%

 epoch: 1113 | train_loss: 0.53, train_acc: 93.8% | test_loss: 0.59, test_acc: 92.7%

 epoch: 1114 | train_loss: 0.50, train_acc: 94.2% | test_loss: 0.53, test_acc: 93.7%


  7%|▋         | 1116/15000 [02:10<21:52, 10.58it/s]


 epoch: 1115 | train_loss: 0.53, train_acc: 94.3% | test_loss: 0.49, test_acc: 94.0%

 epoch: 1116 | train_loss: 0.54, train_acc: 93.5% | test_loss: 0.52, test_acc: 93.9%

 epoch: 1117 | train_loss: 0.51, train_acc: 94.1% | test_loss: 0.52, test_acc: 93.8%


  7%|▋         | 1118/15000 [02:11<21:56, 10.55it/s]


 epoch: 1118 | train_loss: 0.59, train_acc: 93.5% | test_loss: 0.50, test_acc: 93.8%

 epoch: 1119 | train_loss: 0.53, train_acc: 94.0% | test_loss: 0.55, test_acc: 93.7%

input:       with distress the escapist nature of humour as coping mechanism suggests that it is most useful in dealing with

target:      with distress the escapist nature of humour as coping mechanism suggests that it is most useful in dealing with momentary


  7%|▋         | 1120/15000 [02:11<24:56,  9.28it/s]


prediction:  with distress the regular nature of humour as coping mechanism suggests that it is most useful in dealing with the

 epoch: 1120 | train_loss: 0.49, train_acc: 94.2% | test_loss: 0.47, test_acc: 94.7%


  7%|▋         | 1122/15000 [02:11<36:26,  6.35it/s]


 epoch: 1121 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.56, test_acc: 93.4%

 epoch: 1122 | train_loss: 0.51, train_acc: 94.1% | test_loss: 0.47, test_acc: 94.3%


  7%|▋         | 1124/15000 [02:12<32:21,  7.15it/s]


 epoch: 1123 | train_loss: 0.46, train_acc: 94.4% | test_loss: 0.58, test_acc: 92.9%

 epoch: 1124 | train_loss: 0.54, train_acc: 93.1% | test_loss: 0.47, test_acc: 94.3%


  8%|▊         | 1125/15000 [02:12<30:17,  7.63it/s]


 epoch: 1125 | train_loss: 0.49, train_acc: 93.8% | test_loss: 0.48, test_acc: 94.2%

 epoch: 1126 | train_loss: 0.54, train_acc: 93.7% | test_loss: 0.53, test_acc: 93.7%

 epoch: 1127 | train_loss: 0.48, train_acc: 94.6% | test_loss: 0.54, test_acc: 93.8%


  8%|▊         | 1129/15000 [02:12<25:33,  9.05it/s]


 epoch: 1128 | train_loss: 0.56, train_acc: 93.3% | test_loss: 0.51, test_acc: 94.0%

 epoch: 1129 | train_loss: 0.54, train_acc: 93.5% | test_loss: 0.53, test_acc: 94.0%

input:       centuries the grand duchy of moscow grew from small principality under mongol rule to the largest state in europe

target:      centuries the grand duchy of moscow grew from small principality under mongol rule to the largest state in europe overthrowing


  8%|▊         | 1131/15000 [02:12<26:55,  8.59it/s]


prediction:  centuries the grand duchy of moscow grew from small immigration under mongol rule to the largest state in europe the

 epoch: 1130 | train_loss: 0.50, train_acc: 94.1% | test_loss: 0.54, test_acc: 93.4%

 epoch: 1131 | train_loss: 0.49, train_acc: 93.8% | test_loss: 0.53, test_acc: 93.5%


  8%|▊         | 1133/15000 [02:13<24:57,  9.26it/s]


 epoch: 1132 | train_loss: 0.48, train_acc: 94.5% | test_loss: 0.48, test_acc: 94.5%

 epoch: 1133 | train_loss: 0.56, train_acc: 93.5% | test_loss: 0.44, test_acc: 95.0%


  8%|▊         | 1134/15000 [02:13<25:23,  9.10it/s]


 epoch: 1134 | train_loss: 0.51, train_acc: 94.0% | test_loss: 0.51, test_acc: 93.9%


  8%|▊         | 1136/15000 [02:13<44:00,  5.25it/s]


 epoch: 1135 | train_loss: 0.53, train_acc: 93.6% | test_loss: 0.55, test_acc: 93.1%

 epoch: 1136 | train_loss: 0.44, train_acc: 94.8% | test_loss: 0.54, test_acc: 93.8%


  8%|▊         | 1138/15000 [02:13<35:07,  6.58it/s]


 epoch: 1137 | train_loss: 0.46, train_acc: 94.8% | test_loss: 0.59, test_acc: 92.9%

 epoch: 1138 | train_loss: 0.52, train_acc: 93.4% | test_loss: 0.51, test_acc: 94.3%


  8%|▊         | 1140/15000 [02:14<33:51,  6.82it/s]


 epoch: 1139 | train_loss: 0.52, train_acc: 93.3% | test_loss: 0.52, test_acc: 94.0%

input:       considered one of the four major grand slam tournaments in tennis it was held for the first time in

target:      considered one of the four major grand slam tournaments in tennis it was held for the first time in africa

prediction:  considered one of the four major grand demographic tournaments in tennis it was held for the first time in the

 epoch: 1140 | train_loss: 0.47, train_acc: 94.6% | test_loss: 0.56, test_acc: 93.6%


  8%|▊         | 1143/15000 [02:14<26:12,  8.81it/s]


 epoch: 1141 | train_loss: 0.45, train_acc: 94.6% | test_loss: 0.48, test_acc: 94.6%

 epoch: 1142 | train_loss: 0.56, train_acc: 93.3% | test_loss: 0.52, test_acc: 94.0%

 epoch: 1143 | train_loss: 0.57, train_acc: 93.3% | test_loss: 0.47, test_acc: 94.5%


  8%|▊         | 1145/15000 [02:14<24:15,  9.52it/s]


 epoch: 1144 | train_loss: 0.53, train_acc: 94.1% | test_loss: 0.49, test_acc: 94.3%

 epoch: 1145 | train_loss: 0.46, train_acc: 94.6% | test_loss: 0.55, test_acc: 93.1%

 epoch: 1146 | train_loss: 0.50, train_acc: 93.9% | test_loss: 0.56, test_acc: 93.6%


  8%|▊         | 1149/15000 [02:15<22:10, 10.41it/s]


 epoch: 1147 | train_loss: 0.49, train_acc: 93.9% | test_loss: 0.48, test_acc: 94.7%

 epoch: 1148 | train_loss: 0.46, train_acc: 94.7% | test_loss: 0.48, test_acc: 94.4%

 epoch: 1149 | train_loss: 0.48, train_acc: 94.5% | test_loss: 0.56, test_acc: 93.2%

input:       plate which includes easter island neighbors the south american plate and is still considered to be separate tectonic plate

target:      plate which includes easter island neighbors the south american plate and is still considered to be separate tectonic plate despite

prediction:  plate which includes easter island neighbors the south american plate and is still considered to be separate tectonic plate the


  8%|▊         | 1151/15000 [02:15<36:51,  6.26it/s]


 epoch: 1150 | train_loss: 0.50, train_acc: 94.1% | test_loss: 0.54, test_acc: 93.6%

 epoch: 1151 | train_loss: 0.50, train_acc: 94.3% | test_loss: 0.53, test_acc: 93.6%

 epoch: 1152 | train_loss: 0.43, train_acc: 94.8% | test_loss: 0.49, test_acc: 94.2%


  8%|▊         | 1155/15000 [02:15<27:07,  8.51it/s]


 epoch: 1153 | train_loss: 0.51, train_acc: 93.8% | test_loss: 0.50, test_acc: 93.9%

 epoch: 1154 | train_loss: 0.45, train_acc: 94.6% | test_loss: 0.57, test_acc: 93.4%

 epoch: 1155 | train_loss: 0.44, train_acc: 95.1% | test_loss: 0.49, test_acc: 94.2%


  8%|▊         | 1157/15000 [02:16<24:54,  9.26it/s]


 epoch: 1156 | train_loss: 0.49, train_acc: 93.8% | test_loss: 0.47, test_acc: 94.3%

 epoch: 1157 | train_loss: 0.45, train_acc: 94.5% | test_loss: 0.45, test_acc: 94.8%

 epoch: 1158 | train_loss: 0.54, train_acc: 93.7% | test_loss: 0.49, test_acc: 93.9%


  8%|▊         | 1161/15000 [02:16<22:58, 10.04it/s]


 epoch: 1159 | train_loss: 0.48, train_acc: 94.0% | test_loss: 0.50, test_acc: 94.0%

input:       watching demonstration without necessarily imitating every discrete movement modeled by the teacher in the demonstration stroke for stroke susan

target:      watching demonstration without necessarily imitating every discrete movement modeled by the teacher in the demonstration stroke for stroke susan blackmore

prediction:  cooperation demonstration without necessarily imitating every discrete movement modeled by the teacher in the demonstration chinese for religious susan the

 epoch: 1160 | train_loss: 0.50, train_acc: 94.1% | test_loss: 0.53, test_acc: 93.5%

 epoch: 1161 | train_loss: 0.56, train_acc: 93.1% | test_loss: 0.43, test_acc: 94.9%


  8%|▊         | 1163/15000 [02:16<21:45, 10.60it/s]


 epoch: 1162 | train_loss: 0.53, train_acc: 94.2% | test_loss: 0.49, test_acc: 94.3%

 epoch: 1163 | train_loss: 0.49, train_acc: 94.0% | test_loss: 0.49, test_acc: 94.1%


  8%|▊         | 1165/15000 [02:16<25:01,  9.21it/s]


 epoch: 1164 | train_loss: 0.52, train_acc: 93.8% | test_loss: 0.50, test_acc: 94.0%

 epoch: 1165 | train_loss: 0.48, train_acc: 94.0% | test_loss: 0.50, test_acc: 94.1%

 epoch: 1166 | train_loss: 0.44, train_acc: 94.5% | test_loss: 0.53, test_acc: 93.5%


  8%|▊         | 1169/15000 [02:17<21:38, 10.65it/s]


 epoch: 1167 | train_loss: 0.50, train_acc: 94.0% | test_loss: 0.53, test_acc: 93.8%

 epoch: 1168 | train_loss: 0.53, train_acc: 93.7% | test_loss: 0.48, test_acc: 94.3%

 epoch: 1169 | train_loss: 0.55, train_acc: 93.0% | test_loss: 0.50, test_acc: 94.1%


  8%|▊         | 1171/15000 [02:17<21:30, 10.72it/s]


input:       estimated at to years and that researchers on the evolutionary origin of language generally find it plausible to suggest

target:      estimated at to years and that researchers on the evolutionary origin of language generally find it plausible to suggest that

prediction:  estimated at to years and that researchers on the evolutionary origin of language generally find it freedmen to suggest the

 epoch: 1170 | train_loss: 0.52, train_acc: 94.1% | test_loss: 0.51, test_acc: 94.1%

 epoch: 1171 | train_loss: 0.52, train_acc: 93.8% | test_loss: 0.47, test_acc: 94.3%

 epoch: 1172 | train_loss: 0.59, train_acc: 93.0% | test_loss: 0.49, test_acc: 94.3%


  8%|▊         | 1175/15000 [02:17<20:36, 11.18it/s]


 epoch: 1173 | train_loss: 0.50, train_acc: 94.0% | test_loss: 0.48, test_acc: 94.6%

 epoch: 1174 | train_loss: 0.47, train_acc: 94.6% | test_loss: 0.55, test_acc: 93.3%

 epoch: 1175 | train_loss: 0.42, train_acc: 95.4% | test_loss: 0.56, test_acc: 93.1%


  8%|▊         | 1177/15000 [02:17<20:32, 11.21it/s]


 epoch: 1176 | train_loss: 0.49, train_acc: 94.5% | test_loss: 0.57, test_acc: 93.1%

 epoch: 1177 | train_loss: 0.51, train_acc: 93.7% | test_loss: 0.46, test_acc: 94.6%


  8%|▊         | 1180/15000 [02:18<33:58,  6.78it/s]


 epoch: 1178 | train_loss: 0.51, train_acc: 93.8% | test_loss: 0.56, test_acc: 93.5%

 epoch: 1179 | train_loss: 0.47, train_acc: 94.3% | test_loss: 0.51, test_acc: 93.8%

input:       have only about of the critical density since theory suggests that dark energy does not cluster in the usual

target:      have only about of the critical density since theory suggests that dark energy does not cluster in the usual way

prediction:  have only about of the critical density since theory suggests that dark energy does not cluster in the usual the

 epoch: 1180 | train_loss: 0.53, train_acc: 93.8% | test_loss: 0.49, test_acc: 93.9%


  8%|▊         | 1182/15000 [02:18<29:06,  7.91it/s]


 epoch: 1181 | train_loss: 0.46, train_acc: 95.0% | test_loss: 0.51, test_acc: 93.9%

 epoch: 1182 | train_loss: 0.47, train_acc: 94.1% | test_loss: 0.54, test_acc: 93.5%

 epoch: 1183 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.52, test_acc: 93.8%


  8%|▊         | 1186/15000 [02:19<23:25,  9.83it/s]


 epoch: 1184 | train_loss: 0.54, train_acc: 93.6% | test_loss: 0.51, test_acc: 94.3%

 epoch: 1185 | train_loss: 0.49, train_acc: 94.3% | test_loss: 0.50, test_acc: 94.1%

 epoch: 1186 | train_loss: 0.55, train_acc: 93.7% | test_loss: 0.50, test_acc: 94.2%


  8%|▊         | 1188/15000 [02:19<22:35, 10.19it/s]


 epoch: 1187 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.48, test_acc: 94.1%

 epoch: 1188 | train_loss: 0.54, train_acc: 93.8% | test_loss: 0.47, test_acc: 94.4%

 epoch: 1189 | train_loss: 0.46, train_acc: 94.8% | test_loss: 0.48, test_acc: 94.1%


  8%|▊         | 1190/15000 [02:19<23:31,  9.78it/s]


input:       social media platforms there is growing concern that some emerging internet subcultures are becoming increasingly radical it is important

target:      social media platforms there is growing concern that some emerging internet subcultures are becoming increasingly radical it is important to

prediction:  social media platforms there is growing concern that some emerging internet subcultures are becoming increasingly radical it is important the

 epoch: 1190 | train_loss: 0.50, train_acc: 93.9% | test_loss: 0.53, test_acc: 93.9%

 epoch: 1191 | train_loss: 0.48, train_acc: 94.2% | test_loss: 0.49, test_acc: 94.0%


  8%|▊         | 1194/15000 [02:20<33:11,  6.93it/s]


 epoch: 1192 | train_loss: 0.49, train_acc: 94.2% | test_loss: 0.47, test_acc: 94.2%

 epoch: 1193 | train_loss: 0.44, train_acc: 94.8% | test_loss: 0.45, test_acc: 94.7%

 epoch: 1194 | train_loss: 0.46, train_acc: 94.4% | test_loss: 0.48, test_acc: 94.5%


  8%|▊         | 1196/15000 [02:20<28:55,  7.95it/s]


 epoch: 1195 | train_loss: 0.51, train_acc: 93.8% | test_loss: 0.43, test_acc: 95.1%

 epoch: 1196 | train_loss: 0.51, train_acc: 94.2% | test_loss: 0.50, test_acc: 93.8%

 epoch: 1197 | train_loss: 0.48, train_acc: 94.1% | test_loss: 0.55, test_acc: 93.7%


  8%|▊         | 1200/15000 [02:20<24:37,  9.34it/s]


 epoch: 1198 | train_loss: 0.49, train_acc: 94.1% | test_loss: 0.49, test_acc: 94.6%

 epoch: 1199 | train_loss: 0.51, train_acc: 93.8% | test_loss: 0.52, test_acc: 94.0%

input:       distinctions between syntagm and paradigm and the langue parole distinction distinguishing language as an abstract system langue from language

target:      distinctions between syntagm and paradigm and the langue parole distinction distinguishing language as an abstract system langue from language as

prediction:  distinctions between hawaiian and paradigm and the langue parole distinction distinguishing language as an abstract system langue from language the

 epoch: 1200 | train_loss: 0.56, train_acc: 93.8% | test_loss: 0.57, test_acc: 93.3%


  8%|▊         | 1202/15000 [02:21<23:09,  9.93it/s]


 epoch: 1201 | train_loss: 0.53, train_acc: 93.5% | test_loss: 0.51, test_acc: 94.0%

 epoch: 1202 | train_loss: 0.46, train_acc: 94.9% | test_loss: 0.48, test_acc: 94.4%

 epoch: 1203 | train_loss: 0.48, train_acc: 94.6% | test_loss: 0.52, test_acc: 93.9%


  8%|▊         | 1206/15000 [02:21<21:14, 10.83it/s]


 epoch: 1204 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.46, test_acc: 94.6%

 epoch: 1205 | train_loss: 0.44, train_acc: 95.3% | test_loss: 0.49, test_acc: 94.3%

 epoch: 1206 | train_loss: 0.51, train_acc: 93.9% | test_loss: 0.54, test_acc: 93.7%


  8%|▊         | 1208/15000 [02:21<27:53,  8.24it/s]


 epoch: 1207 | train_loss: 0.52, train_acc: 93.8% | test_loss: 0.51, test_acc: 93.9%

 epoch: 1208 | train_loss: 0.47, train_acc: 94.6% | test_loss: 0.48, test_acc: 94.6%

 epoch: 1209 | train_loss: 0.51, train_acc: 94.2% | test_loss: 0.50, test_acc: 94.3%


  8%|▊         | 1210/15000 [02:22<26:25,  8.70it/s]


input:       protestant anglo settlers in north america french speaking canadians were catholic and with the quebec act were guaranteed freedom

target:      protestant anglo settlers in north america french speaking canadians were catholic and with the quebec act were guaranteed freedom to

prediction:  protestant anglo settlers in north america french speaking canadians were catholic and with the quebec act were signs freedom the

 epoch: 1210 | train_loss: 0.48, train_acc: 94.5% | test_loss: 0.53, test_acc: 93.5%

 epoch: 1211 | train_loss: 0.48, train_acc: 93.9% | test_loss: 0.53, test_acc: 93.7%


  8%|▊         | 1214/15000 [02:22<22:07, 10.38it/s]


 epoch: 1212 | train_loss: 0.47, train_acc: 94.1% | test_loss: 0.51, test_acc: 93.6%

 epoch: 1213 | train_loss: 0.45, train_acc: 94.7% | test_loss: 0.47, test_acc: 94.8%

 epoch: 1214 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.48, test_acc: 94.0%


  8%|▊         | 1216/15000 [02:22<21:38, 10.62it/s]


 epoch: 1215 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.47, test_acc: 94.7%

 epoch: 1216 | train_loss: 0.45, train_acc: 94.5% | test_loss: 0.52, test_acc: 94.0%

 epoch: 1217 | train_loss: 0.48, train_acc: 94.6% | test_loss: 0.49, test_acc: 94.0%


  8%|▊         | 1218/15000 [02:22<21:14, 10.81it/s]


 epoch: 1218 | train_loss: 0.52, train_acc: 93.5% | test_loss: 0.55, test_acc: 93.2%

 epoch: 1219 | train_loss: 0.55, train_acc: 93.2% | test_loss: 0.52, test_acc: 93.6%

input:       ancient egyptians had developed glassy material known as faience which they treated as type of artificial semi precious stone

target:      ancient egyptians had developed glassy material known as faience which they treated as type of artificial semi precious stone faience

prediction:  ancient egyptians had developed posix material known as faience which they treated as type of artificial semi precious stone the


  8%|▊         | 1220/15000 [02:22<22:03, 10.41it/s]


 epoch: 1220 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.49, test_acc: 94.0%


  8%|▊         | 1222/15000 [02:23<36:29,  6.29it/s]


 epoch: 1221 | train_loss: 0.52, train_acc: 93.8% | test_loss: 0.48, test_acc: 94.4%

 epoch: 1222 | train_loss: 0.48, train_acc: 94.5% | test_loss: 0.46, test_acc: 94.6%

 epoch: 1223 | train_loss: 0.49, train_acc: 94.3% | test_loss: 0.46, test_acc: 94.6%


  8%|▊         | 1226/15000 [02:23<26:48,  8.56it/s]


 epoch: 1224 | train_loss: 0.46, train_acc: 94.6% | test_loss: 0.48, test_acc: 94.4%

 epoch: 1225 | train_loss: 0.45, train_acc: 95.0% | test_loss: 0.45, test_acc: 94.9%

 epoch: 1226 | train_loss: 0.46, train_acc: 94.2% | test_loss: 0.47, test_acc: 94.3%


  8%|▊         | 1228/15000 [02:24<24:20,  9.43it/s]


 epoch: 1227 | train_loss: 0.56, train_acc: 93.4% | test_loss: 0.45, test_acc: 94.8%

 epoch: 1228 | train_loss: 0.51, train_acc: 94.0% | test_loss: 0.48, test_acc: 94.4%

 epoch: 1229 | train_loss: 0.54, train_acc: 93.3% | test_loss: 0.48, test_acc: 94.6%


  8%|▊         | 1230/15000 [02:24<23:29,  9.77it/s]


input:       lines are projected as points these can be used to find the locations of fold axes relationships between faults

target:      lines are projected as points these can be used to find the locations of fold axes relationships between faults and

prediction:  lines are projected as points these can be used to find the locations of fold axes relationships between faults the

 epoch: 1230 | train_loss: 0.51, train_acc: 93.9% | test_loss: 0.49, test_acc: 94.1%

 epoch: 1231 | train_loss: 0.44, train_acc: 94.8% | test_loss: 0.49, test_acc: 93.9%


  8%|▊         | 1234/15000 [02:24<22:21, 10.26it/s]


 epoch: 1232 | train_loss: 0.50, train_acc: 94.6% | test_loss: 0.52, test_acc: 93.9%

 epoch: 1233 | train_loss: 0.50, train_acc: 94.0% | test_loss: 0.48, test_acc: 94.5%

 epoch: 1234 | train_loss: 0.48, train_acc: 94.2% | test_loss: 0.55, test_acc: 93.7%


  8%|▊         | 1236/15000 [02:25<38:53,  5.90it/s]


 epoch: 1235 | train_loss: 0.48, train_acc: 94.3% | test_loss: 0.46, test_acc: 94.1%

 epoch: 1236 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.45, test_acc: 94.7%

 epoch: 1237 | train_loss: 0.52, train_acc: 93.6% | test_loss: 0.52, test_acc: 94.4%


  8%|▊         | 1239/15000 [02:25<32:09,  7.13it/s]


 epoch: 1238 | train_loss: 0.50, train_acc: 94.2% | test_loss: 0.50, test_acc: 94.2%

 epoch: 1239 | train_loss: 0.49, train_acc: 94.5% | test_loss: 0.46, test_acc: 94.3%

input:       downward clarification needed in each successive generation because the alleles are subject to sampling error this drift halts when


  8%|▊         | 1241/15000 [02:25<30:51,  7.43it/s]


target:      downward clarification needed in each successive generation because the alleles are subject to sampling error this drift halts when an

prediction:  downward clarification needed in each successive generation because the alleles are subject to sampling error this drift plays when the

 epoch: 1240 | train_loss: 0.47, train_acc: 94.3% | test_loss: 0.47, test_acc: 94.5%

 epoch: 1241 | train_loss: 0.48, train_acc: 94.2% | test_loss: 0.47, test_acc: 94.3%


  8%|▊         | 1243/15000 [02:25<27:53,  8.22it/s]


 epoch: 1242 | train_loss: 0.49, train_acc: 94.3% | test_loss: 0.49, test_acc: 93.7%

 epoch: 1243 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.49, test_acc: 93.9%


  8%|▊         | 1245/15000 [02:26<26:48,  8.55it/s]


 epoch: 1244 | train_loss: 0.47, train_acc: 94.5% | test_loss: 0.49, test_acc: 94.1%

 epoch: 1245 | train_loss: 0.51, train_acc: 94.3% | test_loss: 0.50, test_acc: 94.0%


  8%|▊         | 1247/15000 [02:26<26:28,  8.66it/s]


 epoch: 1246 | train_loss: 0.47, train_acc: 94.7% | test_loss: 0.44, test_acc: 94.8%

 epoch: 1247 | train_loss: 0.48, train_acc: 94.6% | test_loss: 0.46, test_acc: 94.6%


  8%|▊         | 1249/15000 [02:26<27:16,  8.40it/s]


 epoch: 1248 | train_loss: 0.49, train_acc: 93.9% | test_loss: 0.41, test_acc: 95.2%

 epoch: 1249 | train_loss: 0.47, train_acc: 94.1% | test_loss: 0.47, test_acc: 94.9%

input:       by some models of the big bang theory to have existed before the big bang clarification needed this primordial


  8%|▊         | 1251/15000 [02:26<28:18,  8.09it/s]


target:      by some models of the big bang theory to have existed before the big bang clarification needed this primordial singularity

prediction:  by some models of the big bang theory to have existed before the big bang clarification needed this primordial the

 epoch: 1250 | train_loss: 0.44, train_acc: 94.8% | test_loss: 0.44, test_acc: 94.7%

 epoch: 1251 | train_loss: 0.49, train_acc: 94.5% | test_loss: 0.53, test_acc: 94.3%


  8%|▊         | 1253/15000 [02:27<25:31,  8.98it/s]


 epoch: 1252 | train_loss: 0.47, train_acc: 94.3% | test_loss: 0.54, test_acc: 93.1%

 epoch: 1253 | train_loss: 0.44, train_acc: 94.9% | test_loss: 0.49, test_acc: 94.1%

 epoch: 1254 | train_loss: 0.45, train_acc: 94.4% | test_loss: 0.46, test_acc: 94.5%


  8%|▊         | 1257/15000 [02:27<23:29,  9.75it/s]


 epoch: 1255 | train_loss: 0.47, train_acc: 94.2% | test_loss: 0.43, test_acc: 94.9%

 epoch: 1256 | train_loss: 0.49, train_acc: 94.6% | test_loss: 0.51, test_acc: 94.0%

 epoch: 1257 | train_loss: 0.46, train_acc: 94.5% | test_loss: 0.53, test_acc: 93.7%


  8%|▊         | 1258/15000 [02:27<23:51,  9.60it/s]


 epoch: 1258 | train_loss: 0.47, train_acc: 94.1% | test_loss: 0.53, test_acc: 93.7%

 epoch: 1259 | train_loss: 0.41, train_acc: 95.2% | test_loss: 0.51, test_acc: 93.9%

input:       today large regions of egypt were covered in treed savanna and traversed by herds of grazing ungulates foliage and

target:      today large regions of egypt were covered in treed savanna and traversed by herds of grazing ungulates foliage and fauna


  8%|▊         | 1261/15000 [02:28<25:46,  8.89it/s]


prediction:  today large regions of egypt were covered in belonged savanna and traversed by herds of grazing ungulates localization and the

 epoch: 1260 | train_loss: 0.41, train_acc: 95.1% | test_loss: 0.47, test_acc: 94.4%

 epoch: 1261 | train_loss: 0.46, train_acc: 94.6% | test_loss: 0.50, test_acc: 94.1%

 epoch: 1262 | train_loss: 0.49, train_acc: 93.8% | test_loss: 0.43, test_acc: 94.7%


  8%|▊         | 1265/15000 [02:28<34:28,  6.64it/s]


 epoch: 1263 | train_loss: 0.49, train_acc: 93.9% | test_loss: 0.46, test_acc: 94.3%

 epoch: 1264 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.53, test_acc: 94.2%

 epoch: 1265 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.47, test_acc: 94.4%


  8%|▊         | 1267/15000 [02:28<29:15,  7.82it/s]


 epoch: 1266 | train_loss: 0.49, train_acc: 94.2% | test_loss: 0.48, test_acc: 94.6%

 epoch: 1267 | train_loss: 0.41, train_acc: 95.3% | test_loss: 0.47, test_acc: 94.2%

 epoch: 1268 | train_loss: 0.50, train_acc: 93.9% | test_loss: 0.43, test_acc: 94.9%


  8%|▊         | 1271/15000 [02:29<24:37,  9.29it/s]


 epoch: 1269 | train_loss: 0.48, train_acc: 94.7% | test_loss: 0.48, test_acc: 94.7%

input:       the largest gold producers in the world in the production of silver in the country extracted tons in the

target:      the largest gold producers in the world in the production of silver in the country extracted tons in the production

prediction:  the largest gold producers in the world in the production of silver in the country extracted tons in the the

 epoch: 1270 | train_loss: 0.50, train_acc: 93.9% | test_loss: 0.49, test_acc: 93.9%

 epoch: 1271 | train_loss: 0.54, train_acc: 93.6% | test_loss: 0.44, test_acc: 95.0%


  8%|▊         | 1273/15000 [02:29<23:06,  9.90it/s]


 epoch: 1272 | train_loss: 0.46, train_acc: 94.6% | test_loss: 0.55, test_acc: 94.2%

 epoch: 1273 | train_loss: 0.50, train_acc: 94.1% | test_loss: 0.47, test_acc: 94.2%

 epoch: 1274 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.56, test_acc: 93.3%


  8%|▊         | 1275/15000 [02:29<22:24, 10.21it/s]


 epoch: 1275 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.51, test_acc: 93.8%

 epoch: 1276 | train_loss: 0.45, train_acc: 94.2% | test_loss: 0.48, test_acc: 94.1%


  9%|▊         | 1279/15000 [02:30<32:12,  7.10it/s]


 epoch: 1277 | train_loss: 0.48, train_acc: 94.4% | test_loss: 0.46, test_acc: 94.5%

 epoch: 1278 | train_loss: 0.47, train_acc: 94.0% | test_loss: 0.52, test_acc: 93.7%

 epoch: 1279 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.52, test_acc: 93.7%


  9%|▊         | 1280/15000 [02:30<31:19,  7.30it/s]


input:       power in egypt as the hyksos around bc as the power of the middle kingdom kings weakened western asian

target:      power in egypt as the hyksos around bc as the power of the middle kingdom kings weakened western asian people

prediction:  power in egypt as the hyksos around bc as the power of the middle kingdom kings weakened western asian the

 epoch: 1280 | train_loss: 0.43, train_acc: 95.3% | test_loss: 0.45, test_acc: 94.5%

 epoch: 1281 | train_loss: 0.52, train_acc: 93.5% | test_loss: 0.51, test_acc: 93.9%


  9%|▊         | 1284/15000 [02:30<24:27,  9.35it/s]


 epoch: 1282 | train_loss: 0.51, train_acc: 93.9% | test_loss: 0.48, test_acc: 94.0%

 epoch: 1283 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.47, test_acc: 94.6%

 epoch: 1284 | train_loss: 0.51, train_acc: 93.5% | test_loss: 0.49, test_acc: 94.3%


  9%|▊         | 1286/15000 [02:31<22:46, 10.04it/s]


 epoch: 1285 | train_loss: 0.45, train_acc: 94.0% | test_loss: 0.47, test_acc: 94.5%

 epoch: 1286 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.44, test_acc: 94.6%

 epoch: 1287 | train_loss: 0.44, train_acc: 94.9% | test_loss: 0.45, test_acc: 94.5%


  9%|▊         | 1288/15000 [02:31<22:23, 10.21it/s]


 epoch: 1288 | train_loss: 0.51, train_acc: 93.9% | test_loss: 0.43, test_acc: 94.8%

 epoch: 1289 | train_loss: 0.44, train_acc: 94.7% | test_loss: 0.48, test_acc: 94.5%

input:       that makes up these systems or the set of utterances that can be produced from those rules all languages

target:      that makes up these systems or the set of utterances that can be produced from those rules all languages rely

prediction:  that makes up these systems or the set of utterances that can be produced from those rules all languages the


  9%|▊         | 1290/15000 [02:31<22:54,  9.97it/s]


 epoch: 1290 | train_loss: 0.53, train_acc: 93.8% | test_loss: 0.51, test_acc: 93.8%

 epoch: 1291 | train_loss: 0.52, train_acc: 93.7% | test_loss: 0.47, test_acc: 94.3%


  9%|▊         | 1294/15000 [02:31<23:02,  9.91it/s]


 epoch: 1292 | train_loss: 0.47, train_acc: 94.6% | test_loss: 0.44, test_acc: 94.5%

 epoch: 1293 | train_loss: 0.47, train_acc: 94.2% | test_loss: 0.43, test_acc: 94.6%

 epoch: 1294 | train_loss: 0.47, train_acc: 94.0% | test_loss: 0.52, test_acc: 93.9%


  9%|▊         | 1296/15000 [02:32<21:46, 10.49it/s]


 epoch: 1295 | train_loss: 0.43, train_acc: 94.5% | test_loss: 0.49, test_acc: 94.1%

 epoch: 1296 | train_loss: 0.45, train_acc: 94.7% | test_loss: 0.49, test_acc: 94.3%

 epoch: 1297 | train_loss: 0.51, train_acc: 93.8% | test_loss: 0.50, test_acc: 94.1%


  9%|▊         | 1300/15000 [02:32<21:36, 10.57it/s]


 epoch: 1298 | train_loss: 0.45, train_acc: 94.9% | test_loss: 0.50, test_acc: 94.0%

 epoch: 1299 | train_loss: 0.49, train_acc: 94.4% | test_loss: 0.47, test_acc: 94.7%

input:       revitalisation programme as they deem welsh to be the world leading example for the survival of languages in hawaiian

target:      revitalisation programme as they deem welsh to be the world leading example for the survival of languages in hawaiian tv

prediction:  tonga programme as they decades welsh to be the world leading example for the survival of languages in hawaiian the

 epoch: 1300 | train_loss: 0.49, train_acc: 94.0% | test_loss: 0.44, test_acc: 94.7%


  9%|▊         | 1302/15000 [02:32<21:10, 10.78it/s]


 epoch: 1301 | train_loss: 0.50, train_acc: 94.0% | test_loss: 0.55, test_acc: 93.7%

 epoch: 1302 | train_loss: 0.46, train_acc: 94.5% | test_loss: 0.50, test_acc: 93.9%

 epoch: 1303 | train_loss: 0.45, train_acc: 94.6% | test_loss: 0.48, test_acc: 94.5%


  9%|▊         | 1304/15000 [02:32<21:09, 10.79it/s]


 epoch: 1304 | train_loss: 0.50, train_acc: 94.0% | test_loss: 0.51, test_acc: 93.9%

 epoch: 1305 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.46, test_acc: 94.7%


  9%|▊         | 1308/15000 [02:33<31:22,  7.27it/s]


 epoch: 1306 | train_loss: 0.42, train_acc: 95.2% | test_loss: 0.50, test_acc: 94.1%

 epoch: 1307 | train_loss: 0.46, train_acc: 93.9% | test_loss: 0.46, test_acc: 94.5%

 epoch: 1308 | train_loss: 0.46, train_acc: 94.4% | test_loss: 0.48, test_acc: 93.7%


  9%|▊         | 1310/15000 [02:33<28:55,  7.89it/s]


 epoch: 1309 | train_loss: 0.44, train_acc: 94.9% | test_loss: 0.47, test_acc: 94.3%

input:       in with its first major visual update in decade development for the new site took more than year it

target:      in with its first major visual update in decade development for the new site took more than year it was

prediction:  in with its first major visual update in decade development for the new site took more than year it the

 epoch: 1310 | train_loss: 0.47, train_acc: 94.2% | test_loss: 0.41, test_acc: 95.1%

 epoch: 1311 | train_loss: 0.41, train_acc: 94.8% | test_loss: 0.47, test_acc: 94.5%


  9%|▉         | 1314/15000 [02:34<24:01,  9.50it/s]


 epoch: 1312 | train_loss: 0.49, train_acc: 94.3% | test_loss: 0.47, test_acc: 94.8%

 epoch: 1313 | train_loss: 0.47, train_acc: 94.0% | test_loss: 0.49, test_acc: 94.2%

 epoch: 1314 | train_loss: 0.49, train_acc: 93.8% | test_loss: 0.43, test_acc: 94.8%


  9%|▉         | 1316/15000 [02:34<22:45, 10.02it/s]


 epoch: 1315 | train_loss: 0.43, train_acc: 94.8% | test_loss: 0.51, test_acc: 94.0%

 epoch: 1316 | train_loss: 0.45, train_acc: 94.3% | test_loss: 0.43, test_acc: 95.1%

 epoch: 1317 | train_loss: 0.46, train_acc: 94.5% | test_loss: 0.51, test_acc: 94.0%


  9%|▉         | 1318/15000 [02:34<22:06, 10.32it/s]


 epoch: 1318 | train_loss: 0.45, train_acc: 94.6% | test_loss: 0.47, test_acc: 94.1%

 epoch: 1319 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.39, test_acc: 95.3%

input:       lexington and concord on april gathered in philadelphia following the war outbreak delegates from the thirteen colonies established the

target:      lexington and concord on april gathered in philadelphia following the war outbreak delegates from the thirteen colonies established the continental

prediction:  lexington and fire on april gathered in philadelphia following the war outbreak delegates from the thirteen colonies established the the


  9%|▉         | 1320/15000 [02:34<23:05,  9.87it/s]


 epoch: 1320 | train_loss: 0.44, train_acc: 95.0% | test_loss: 0.43, test_acc: 95.0%


  9%|▉         | 1322/15000 [02:35<36:15,  6.29it/s]


 epoch: 1321 | train_loss: 0.50, train_acc: 94.1% | test_loss: 0.49, test_acc: 93.6%

 epoch: 1322 | train_loss: 0.48, train_acc: 94.3% | test_loss: 0.42, test_acc: 94.9%

 epoch: 1323 | train_loss: 0.42, train_acc: 95.3% | test_loss: 0.42, test_acc: 95.2%


  9%|▉         | 1326/15000 [02:35<27:21,  8.33it/s]


 epoch: 1324 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.50, test_acc: 94.1%

 epoch: 1325 | train_loss: 0.49, train_acc: 94.1% | test_loss: 0.50, test_acc: 94.1%

 epoch: 1326 | train_loss: 0.46, train_acc: 94.3% | test_loss: 0.47, test_acc: 94.2%


  9%|▉         | 1328/15000 [02:35<25:30,  8.93it/s]


 epoch: 1327 | train_loss: 0.43, train_acc: 94.8% | test_loss: 0.44, test_acc: 95.0%

 epoch: 1328 | train_loss: 0.52, train_acc: 93.8% | test_loss: 0.40, test_acc: 95.5%

 epoch: 1329 | train_loss: 0.48, train_acc: 94.3% | test_loss: 0.45, test_acc: 94.7%


  9%|▉         | 1330/15000 [02:36<25:37,  8.89it/s]


input:       the northwestern hawaiian islands were formed to million years ago as shield volcanoes over the same volcanic hotspot that

target:      the northwestern hawaiian islands were formed to million years ago as shield volcanoes over the same volcanic hotspot that formed

prediction:  the northwestern hawaiian islands were formed to million years ago as shield volcanoes over the same volcanic remarked that the

 epoch: 1330 | train_loss: 0.48, train_acc: 94.1% | test_loss: 0.44, test_acc: 94.5%

 epoch: 1331 | train_loss: 0.46, train_acc: 94.3% | test_loss: 0.46, test_acc: 94.9%


  9%|▉         | 1334/15000 [02:36<22:47,  9.99it/s]


 epoch: 1332 | train_loss: 0.41, train_acc: 95.4% | test_loss: 0.51, test_acc: 93.9%

 epoch: 1333 | train_loss: 0.49, train_acc: 94.0% | test_loss: 0.49, test_acc: 94.3%

 epoch: 1334 | train_loss: 0.47, train_acc: 94.2% | test_loss: 0.44, test_acc: 94.8%


  9%|▉         | 1336/15000 [02:36<31:17,  7.28it/s]


 epoch: 1335 | train_loss: 0.48, train_acc: 94.2% | test_loss: 0.49, test_acc: 94.2%

 epoch: 1336 | train_loss: 0.50, train_acc: 94.2% | test_loss: 0.50, test_acc: 94.1%

 epoch: 1337 | train_loss: 0.48, train_acc: 93.6% | test_loss: 0.48, test_acc: 93.8%


  9%|▉         | 1338/15000 [02:37<27:26,  8.30it/s]


 epoch: 1338 | train_loss: 0.47, train_acc: 94.2% | test_loss: 0.47, test_acc: 94.5%

 epoch: 1339 | train_loss: 0.47, train_acc: 94.2% | test_loss: 0.46, test_acc: 94.6%

input:       antartike modern le antarctique attested in and from there the middle english pol antartik found first in treatise written

target:      antartike modern le antarctique attested in and from there the middle english pol antartik found first in treatise written by

prediction:  sentence modern le cape attested in and from there the middle english pol antartik found first in treatise written the


  9%|▉         | 1342/15000 [02:37<24:15,  9.38it/s]


 epoch: 1340 | train_loss: 0.43, train_acc: 95.2% | test_loss: 0.48, test_acc: 94.0%

 epoch: 1341 | train_loss: 0.48, train_acc: 94.3% | test_loss: 0.49, test_acc: 94.3%

 epoch: 1342 | train_loss: 0.47, train_acc: 94.3% | test_loss: 0.50, test_acc: 94.2%


  9%|▉         | 1344/15000 [02:37<22:45, 10.00it/s]


 epoch: 1343 | train_loss: 0.47, train_acc: 94.2% | test_loss: 0.44, test_acc: 94.6%

 epoch: 1344 | train_loss: 0.48, train_acc: 94.5% | test_loss: 0.50, test_acc: 94.1%

 epoch: 1345 | train_loss: 0.46, train_acc: 94.6% | test_loss: 0.46, test_acc: 94.2%


  9%|▉         | 1348/15000 [02:37<21:29, 10.59it/s]


 epoch: 1346 | train_loss: 0.49, train_acc: 94.2% | test_loss: 0.45, test_acc: 94.5%

 epoch: 1347 | train_loss: 0.48, train_acc: 93.8% | test_loss: 0.44, test_acc: 94.6%

 epoch: 1348 | train_loss: 0.43, train_acc: 94.9% | test_loss: 0.51, test_acc: 93.9%


  9%|▉         | 1350/15000 [02:38<39:07,  5.81it/s]


 epoch: 1349 | train_loss: 0.53, train_acc: 93.4% | test_loss: 0.57, test_acc: 93.4%

input:       per month while foreman might earn sacks kg or lb prices were fixed across the country and recorded in

target:      per month while foreman might earn sacks kg or lb prices were fixed across the country and recorded in lists

prediction:  per month while potential might earn sacks kg or lb prices were fixed across the country and recorded in the

 epoch: 1350 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.47, test_acc: 94.5%


  9%|▉         | 1352/15000 [02:38<34:18,  6.63it/s]


 epoch: 1351 | train_loss: 0.49, train_acc: 94.2% | test_loss: 0.48, test_acc: 94.5%

 epoch: 1352 | train_loss: 0.49, train_acc: 94.2% | test_loss: 0.46, test_acc: 94.5%


  9%|▉         | 1354/15000 [02:39<29:50,  7.62it/s]


 epoch: 1353 | train_loss: 0.42, train_acc: 95.2% | test_loss: 0.44, test_acc: 95.2%

 epoch: 1354 | train_loss: 0.45, train_acc: 94.5% | test_loss: 0.42, test_acc: 95.1%


  9%|▉         | 1357/15000 [02:39<25:39,  8.86it/s]


 epoch: 1355 | train_loss: 0.48, train_acc: 94.3% | test_loss: 0.39, test_acc: 95.3%

 epoch: 1356 | train_loss: 0.45, train_acc: 94.4% | test_loss: 0.42, test_acc: 95.3%

 epoch: 1357 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.51, test_acc: 93.9%


  9%|▉         | 1359/15000 [02:39<24:58,  9.10it/s]


 epoch: 1358 | train_loss: 0.47, train_acc: 94.7% | test_loss: 0.45, test_acc: 94.9%

 epoch: 1359 | train_loss: 0.46, train_acc: 94.6% | test_loss: 0.51, test_acc: 94.0%

input:       of the java edition allow further customization including the ability to add new advancements dimensions functions loot tables predicates


  9%|▉         | 1361/15000 [02:39<29:39,  7.67it/s]


target:      of the java edition allow further customization including the ability to add new advancements dimensions functions loot tables predicates recipes

prediction:  of the java edition allow further hierarchy including the ability to add new advancements dimensions functions loot tables predicates the

 epoch: 1360 | train_loss: 0.48, train_acc: 94.0% | test_loss: 0.42, test_acc: 94.9%

 epoch: 1361 | train_loss: 0.45, train_acc: 94.4% | test_loss: 0.43, test_acc: 95.3%


  9%|▉         | 1362/15000 [02:40<28:11,  8.06it/s]


 epoch: 1362 | train_loss: 0.51, train_acc: 93.8% | test_loss: 0.47, test_acc: 93.9%


  9%|▉         | 1364/15000 [02:40<47:16,  4.81it/s]


 epoch: 1363 | train_loss: 0.53, train_acc: 93.9% | test_loss: 0.52, test_acc: 93.3%

 epoch: 1364 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.47, test_acc: 94.6%

 epoch: 1365 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.46, test_acc: 94.3%


  9%|▉         | 1367/15000 [02:41<33:29,  6.78it/s]


 epoch: 1366 | train_loss: 0.50, train_acc: 93.8% | test_loss: 0.50, test_acc: 94.4%

 epoch: 1367 | train_loss: 0.55, train_acc: 93.7% | test_loss: 0.43, test_acc: 94.7%


  9%|▉         | 1369/15000 [02:41<30:01,  7.57it/s]


 epoch: 1368 | train_loss: 0.45, train_acc: 94.5% | test_loss: 0.49, test_acc: 94.5%

 epoch: 1369 | train_loss: 0.42, train_acc: 94.6% | test_loss: 0.42, test_acc: 95.3%

input:       not to do so players then re spawn at their spawn point which by default is where players first


  9%|▉         | 1370/15000 [02:41<31:23,  7.24it/s]


target:      not to do so players then re spawn at their spawn point which by default is where players first spawn

prediction:  not to do so players then re spawn at their spawn point which by default is where players first the

 epoch: 1370 | train_loss: 0.45, train_acc: 94.7% | test_loss: 0.42, test_acc: 95.3%

 epoch: 1371 | train_loss: 0.48, train_acc: 94.6% | test_loss: 0.44, test_acc: 94.5%


  9%|▉         | 1374/15000 [02:41<23:18,  9.74it/s]


 epoch: 1372 | train_loss: 0.46, train_acc: 94.8% | test_loss: 0.49, test_acc: 94.4%

 epoch: 1373 | train_loss: 0.43, train_acc: 94.4% | test_loss: 0.45, test_acc: 94.8%

 epoch: 1374 | train_loss: 0.43, train_acc: 94.8% | test_loss: 0.41, test_acc: 95.1%


  9%|▉         | 1376/15000 [02:41<21:33, 10.53it/s]


 epoch: 1375 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.49, test_acc: 94.3%

 epoch: 1376 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.44, test_acc: 94.7%

 epoch: 1377 | train_loss: 0.46, train_acc: 95.0% | test_loss: 0.41, test_acc: 94.7%


  9%|▉         | 1380/15000 [02:42<24:05,  9.43it/s]


 epoch: 1378 | train_loss: 0.45, train_acc: 95.0% | test_loss: 0.44, test_acc: 95.0%

 epoch: 1379 | train_loss: 0.48, train_acc: 94.5% | test_loss: 0.49, test_acc: 94.1%

input:       he was popular with the commoners as with his soldiers whose salary he raised starting in the influence of

target:      he was popular with the commoners as with his soldiers whose salary he raised starting in the influence of his

prediction:  he was popular with the default as with his soldiers whose salary he raised starting in the influence of the

 epoch: 1380 | train_loss: 0.50, train_acc: 94.2% | test_loss: 0.43, test_acc: 95.0%


  9%|▉         | 1382/15000 [02:42<22:24, 10.13it/s]


 epoch: 1381 | train_loss: 0.45, train_acc: 94.7% | test_loss: 0.44, test_acc: 94.6%

 epoch: 1382 | train_loss: 0.44, train_acc: 94.7% | test_loss: 0.44, test_acc: 94.6%

 epoch: 1383 | train_loss: 0.47, train_acc: 94.3% | test_loss: 0.53, test_acc: 93.4%


  9%|▉         | 1386/15000 [02:42<20:56, 10.84it/s]


 epoch: 1384 | train_loss: 0.48, train_acc: 93.9% | test_loss: 0.41, test_acc: 95.2%

 epoch: 1385 | train_loss: 0.50, train_acc: 93.8% | test_loss: 0.45, test_acc: 94.8%

 epoch: 1386 | train_loss: 0.45, train_acc: 94.5% | test_loss: 0.44, test_acc: 94.8%


  9%|▉         | 1388/15000 [02:43<20:52, 10.87it/s]


 epoch: 1387 | train_loss: 0.43, train_acc: 94.2% | test_loss: 0.48, test_acc: 94.5%

 epoch: 1388 | train_loss: 0.50, train_acc: 94.0% | test_loss: 0.46, test_acc: 94.6%

 epoch: 1389 | train_loss: 0.48, train_acc: 94.6% | test_loss: 0.49, test_acc: 94.3%


  9%|▉         | 1390/15000 [02:43<21:41, 10.46it/s]


input:       c programming language which has open implementations of most parts of the system also has common language runtime clr

target:      c programming language which has open implementations of most parts of the system also has common language runtime clr as

prediction:  hand programming language which has open implementations of most parts of the system also has common language runtime wrote the

 epoch: 1390 | train_loss: 0.45, train_acc: 95.2% | test_loss: 0.48, test_acc: 94.1%

 epoch: 1391 | train_loss: 0.45, train_acc: 94.4% | test_loss: 0.53, test_acc: 93.8%


  9%|▉         | 1394/15000 [02:44<31:15,  7.25it/s]


 epoch: 1392 | train_loss: 0.42, train_acc: 95.1% | test_loss: 0.44, test_acc: 94.7%

 epoch: 1393 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.46, test_acc: 94.1%

 epoch: 1394 | train_loss: 0.40, train_acc: 95.5% | test_loss: 0.40, test_acc: 95.1%


  9%|▉         | 1396/15000 [02:44<27:16,  8.31it/s]


 epoch: 1395 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.42, test_acc: 95.2%

 epoch: 1396 | train_loss: 0.53, train_acc: 93.7% | test_loss: 0.46, test_acc: 94.6%

 epoch: 1397 | train_loss: 0.42, train_acc: 94.9% | test_loss: 0.46, test_acc: 94.2%


  9%|▉         | 1400/15000 [02:44<24:15,  9.34it/s]


 epoch: 1398 | train_loss: 0.47, train_acc: 94.3% | test_loss: 0.42, test_acc: 94.6%

 epoch: 1399 | train_loss: 0.47, train_acc: 94.3% | test_loss: 0.47, test_acc: 94.3%

input:       almost as large as that of the rest of asia combined in japan economy nearly equaled that of the

target:      almost as large as that of the rest of asia combined in japan economy nearly equaled that of the us

prediction:  almost as large as that of the rest of asia combined in japan economy nearly equaled that of the the

 epoch: 1400 | train_loss: 0.45, train_acc: 94.6% | test_loss: 0.41, test_acc: 95.2%


  9%|▉         | 1402/15000 [02:44<22:48,  9.94it/s]


 epoch: 1401 | train_loss: 0.44, train_acc: 94.7% | test_loss: 0.44, test_acc: 94.7%

 epoch: 1402 | train_loss: 0.46, train_acc: 94.4% | test_loss: 0.42, test_acc: 95.1%

 epoch: 1403 | train_loss: 0.48, train_acc: 94.1% | test_loss: 0.47, test_acc: 94.3%


  9%|▉         | 1404/15000 [02:44<22:04, 10.27it/s]


 epoch: 1404 | train_loss: 0.42, train_acc: 95.2% | test_loss: 0.45, test_acc: 94.9%

 epoch: 1405 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.47, test_acc: 94.2%


  9%|▉         | 1408/15000 [02:45<31:55,  7.09it/s]


 epoch: 1406 | train_loss: 0.44, train_acc: 94.9% | test_loss: 0.39, test_acc: 95.4%

 epoch: 1407 | train_loss: 0.43, train_acc: 94.9% | test_loss: 0.46, test_acc: 94.7%

 epoch: 1408 | train_loss: 0.42, train_acc: 94.5% | test_loss: 0.37, test_acc: 95.4%


  9%|▉         | 1410/15000 [02:45<28:57,  7.82it/s]


 epoch: 1409 | train_loss: 0.45, train_acc: 95.1% | test_loss: 0.43, test_acc: 94.8%

input:       and federation the eu originated in western europe but has been expanding eastward since the fall of the soviet

target:      and federation the eu originated in western europe but has been expanding eastward since the fall of the soviet union

prediction:  and federation the eu originated in western europe but has been expanding eastward since the fall of the soviet of

 epoch: 1410 | train_loss: 0.43, train_acc: 95.2% | test_loss: 0.40, test_acc: 95.1%

 epoch: 1411 | train_loss: 0.42, train_acc: 94.9% | test_loss: 0.41, test_acc: 94.6%


  9%|▉         | 1414/15000 [02:46<24:18,  9.31it/s]


 epoch: 1412 | train_loss: 0.46, train_acc: 94.3% | test_loss: 0.46, test_acc: 94.5%

 epoch: 1413 | train_loss: 0.51, train_acc: 93.6% | test_loss: 0.47, test_acc: 94.5%

 epoch: 1414 | train_loss: 0.44, train_acc: 94.8% | test_loss: 0.45, test_acc: 94.4%


  9%|▉         | 1416/15000 [02:46<22:47,  9.93it/s]


 epoch: 1415 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.45, test_acc: 94.4%

 epoch: 1416 | train_loss: 0.46, train_acc: 94.6% | test_loss: 0.46, test_acc: 94.8%

 epoch: 1417 | train_loss: 0.42, train_acc: 94.8% | test_loss: 0.46, test_acc: 94.5%


  9%|▉         | 1420/15000 [02:46<22:06, 10.23it/s]


 epoch: 1418 | train_loss: 0.42, train_acc: 95.1% | test_loss: 0.38, test_acc: 95.4%

 epoch: 1419 | train_loss: 0.41, train_acc: 95.1% | test_loss: 0.46, test_acc: 94.6%

input:       nearby islands varies from about in winter to about in the summer some of the research stations are staffed

target:      nearby islands varies from about in winter to about in the summer some of the research stations are staffed year

prediction:  nearby islands varies from about in winter to about in the summer some of the research stations are wmf the

 epoch: 1420 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.44, test_acc: 95.0%


  9%|▉         | 1422/15000 [02:47<25:11,  8.99it/s]


 epoch: 1421 | train_loss: 0.49, train_acc: 94.2% | test_loss: 0.40, test_acc: 95.5%

 epoch: 1422 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.43, test_acc: 95.0%

 epoch: 1423 | train_loss: 0.41, train_acc: 94.9% | test_loss: 0.43, test_acc: 94.8%


 10%|▉         | 1426/15000 [02:47<21:57, 10.31it/s]


 epoch: 1424 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.45, test_acc: 94.9%

 epoch: 1425 | train_loss: 0.41, train_acc: 95.0% | test_loss: 0.43, test_acc: 94.6%

 epoch: 1426 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.46, test_acc: 94.6%


 10%|▉         | 1428/15000 [02:47<20:47, 10.88it/s]


 epoch: 1427 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.41, test_acc: 95.1%

 epoch: 1428 | train_loss: 0.48, train_acc: 94.2% | test_loss: 0.38, test_acc: 95.4%

 epoch: 1429 | train_loss: 0.43, train_acc: 94.7% | test_loss: 0.44, test_acc: 94.8%


 10%|▉         | 1430/15000 [02:47<21:18, 10.61it/s]


input:       south american plate and is still considered to be separate tectonic plate despite only containing handful of islands the

target:      south american plate and is still considered to be separate tectonic plate despite only containing handful of islands the new

prediction:  south american plate and is still considered to be separate tectonic plate despite only containing handful of islands the the

 epoch: 1430 | train_loss: 0.40, train_acc: 95.2% | test_loss: 0.43, test_acc: 95.2%

 epoch: 1431 | train_loss: 0.49, train_acc: 94.0% | test_loss: 0.45, test_acc: 94.9%


 10%|▉         | 1434/15000 [02:48<21:00, 10.77it/s]


 epoch: 1432 | train_loss: 0.43, train_acc: 94.7% | test_loss: 0.41, test_acc: 95.0%

 epoch: 1433 | train_loss: 0.44, train_acc: 95.0% | test_loss: 0.46, test_acc: 94.5%

 epoch: 1434 | train_loss: 0.41, train_acc: 94.8% | test_loss: 0.46, test_acc: 94.6%


 10%|▉         | 1436/15000 [02:48<35:59,  6.28it/s]


 epoch: 1435 | train_loss: 0.41, train_acc: 94.9% | test_loss: 0.49, test_acc: 93.9%

 epoch: 1436 | train_loss: 0.49, train_acc: 94.0% | test_loss: 0.47, test_acc: 94.4%

 epoch: 1437 | train_loss: 0.45, train_acc: 94.5% | test_loss: 0.44, test_acc: 95.0%


 10%|▉         | 1440/15000 [02:49<28:07,  8.04it/s]


 epoch: 1438 | train_loss: 0.39, train_acc: 95.2% | test_loss: 0.47, test_acc: 94.5%

 epoch: 1439 | train_loss: 0.44, train_acc: 94.7% | test_loss: 0.46, test_acc: 94.3%

input:       core and to understand the depositional environment in which the rock units formed geochronologists precisely date rocks within the

target:      core and to understand the depositional environment in which the rock units formed geochronologists precisely date rocks within the stratigraphic

prediction:  core and to understand the depositional environment in which the rock units formed geochronologists precisely date rocks within the the

 epoch: 1440 | train_loss: 0.46, train_acc: 94.9% | test_loss: 0.42, test_acc: 95.1%


 10%|▉         | 1442/15000 [02:49<25:23,  8.90it/s]


 epoch: 1441 | train_loss: 0.41, train_acc: 95.4% | test_loss: 0.44, test_acc: 94.6%

 epoch: 1442 | train_loss: 0.43, train_acc: 94.6% | test_loss: 0.42, test_acc: 94.9%

 epoch: 1443 | train_loss: 0.48, train_acc: 94.6% | test_loss: 0.48, test_acc: 94.3%


 10%|▉         | 1446/15000 [02:49<22:28, 10.05it/s]


 epoch: 1444 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.47, test_acc: 94.8%

 epoch: 1445 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.40, test_acc: 95.3%

 epoch: 1446 | train_loss: 0.47, train_acc: 94.3% | test_loss: 0.42, test_acc: 95.1%


 10%|▉         | 1448/15000 [02:49<21:34, 10.47it/s]


 epoch: 1447 | train_loss: 0.48, train_acc: 93.7% | test_loss: 0.51, test_acc: 94.3%

 epoch: 1448 | train_loss: 0.43, train_acc: 94.9% | test_loss: 0.43, test_acc: 94.9%

 epoch: 1449 | train_loss: 0.42, train_acc: 94.6% | test_loss: 0.49, test_acc: 94.7%

input:       continental groupings it is the ocean that links the parts of the region together john eperjesi book the imperialist

target:      continental groupings it is the ocean that links the parts of the region together john eperjesi book the imperialist imaginary

prediction:  continental groupings it is the ocean that links the parts of the region together john eperjesi book the imperialist the


 10%|▉         | 1452/15000 [02:50<30:24,  7.43it/s]


 epoch: 1450 | train_loss: 0.47, train_acc: 94.4% | test_loss: 0.44, test_acc: 94.8%

 epoch: 1451 | train_loss: 0.44, train_acc: 94.4% | test_loss: 0.47, test_acc: 94.5%

 epoch: 1452 | train_loss: 0.46, train_acc: 94.2% | test_loss: 0.44, test_acc: 94.7%


 10%|▉         | 1454/15000 [02:50<26:50,  8.41it/s]


 epoch: 1453 | train_loss: 0.41, train_acc: 95.1% | test_loss: 0.44, test_acc: 94.5%

 epoch: 1454 | train_loss: 0.41, train_acc: 95.1% | test_loss: 0.47, test_acc: 94.0%

 epoch: 1455 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.41, test_acc: 95.0%


 10%|▉         | 1458/15000 [02:51<22:20, 10.10it/s]


 epoch: 1456 | train_loss: 0.37, train_acc: 95.3% | test_loss: 0.41, test_acc: 95.1%

 epoch: 1457 | train_loss: 0.42, train_acc: 95.1% | test_loss: 0.47, test_acc: 94.6%

 epoch: 1458 | train_loss: 0.46, train_acc: 94.6% | test_loss: 0.48, test_acc: 94.5%


 10%|▉         | 1460/15000 [02:51<22:15, 10.14it/s]


 epoch: 1459 | train_loss: 0.50, train_acc: 94.1% | test_loss: 0.46, test_acc: 94.6%

input:       and marked the end of the crisis of the third century diocletian appointed co emperor in and delegated further

target:      and marked the end of the crisis of the third century diocletian appointed co emperor in and delegated further with

prediction:  and marked the end of the crisis of the third century diocletian appointed co emperor in and maritime further the

 epoch: 1460 | train_loss: 0.45, train_acc: 94.6% | test_loss: 0.44, test_acc: 95.0%


 10%|▉         | 1462/15000 [02:51<22:39,  9.96it/s]


 epoch: 1461 | train_loss: 0.49, train_acc: 94.2% | test_loss: 0.41, test_acc: 95.3%

 epoch: 1462 | train_loss: 0.43, train_acc: 94.9% | test_loss: 0.44, test_acc: 94.8%

 epoch: 1463 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.47, test_acc: 94.4%


 10%|▉         | 1465/15000 [02:52<26:32,  8.50it/s]


 epoch: 1464 | train_loss: 0.44, train_acc: 95.0% | test_loss: 0.43, test_acc: 94.8%

 epoch: 1465 | train_loss: 0.44, train_acc: 94.3% | test_loss: 0.44, test_acc: 94.6%


 10%|▉         | 1469/15000 [02:52<24:36,  9.16it/s]


 epoch: 1466 | train_loss: 0.39, train_acc: 95.6% | test_loss: 0.44, test_acc: 94.5%

 epoch: 1467 | train_loss: 0.42, train_acc: 94.8% | test_loss: 0.44, test_acc: 94.6%

 epoch: 1468 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.43, test_acc: 94.6%

 epoch: 1469 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.45, test_acc: 94.9%


 10%|▉         | 1470/15000 [02:52<27:13,  8.28it/s]


input:       the cook islands society islands and austral islands in the center and the marquesas islands the tuamotus mangareva islands

target:      the cook islands society islands and austral islands in the center and the marquesas islands the tuamotus mangareva islands and

prediction:  the cook islands society islands and certainly islands in the center and the professions islands the charlie design islands the

 epoch: 1470 | train_loss: 0.44, train_acc: 94.9% | test_loss: 0.43, test_acc: 94.6%

 epoch: 1471 | train_loss: 0.48, train_acc: 94.3% | test_loss: 0.43, test_acc: 95.1%


 10%|▉         | 1474/15000 [02:52<24:05,  9.36it/s]


 epoch: 1472 | train_loss: 0.45, train_acc: 94.3% | test_loss: 0.41, test_acc: 95.2%

 epoch: 1473 | train_loss: 0.45, train_acc: 94.3% | test_loss: 0.44, test_acc: 94.9%

 epoch: 1474 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.46, test_acc: 94.7%


 10%|▉         | 1476/15000 [02:53<23:30,  9.59it/s]


 epoch: 1475 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.48, test_acc: 94.2%

 epoch: 1476 | train_loss: 0.46, train_acc: 94.4% | test_loss: 0.43, test_acc: 95.0%


 10%|▉         | 1477/15000 [02:53<23:50,  9.45it/s]


 epoch: 1477 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.39, test_acc: 95.0%


 10%|▉         | 1479/15000 [02:53<44:12,  5.10it/s]


 epoch: 1478 | train_loss: 0.40, train_acc: 95.0% | test_loss: 0.45, test_acc: 94.6%

 epoch: 1479 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.45, test_acc: 94.7%

input:       region although the economy of australia is by far the largest and most dominant economy in the region and


 10%|▉         | 1480/15000 [02:54<41:11,  5.47it/s]


target:      region although the economy of australia is by far the largest and most dominant economy in the region and one

prediction:  region although the economy of australia is by far the largest and most dominant economy in the region and the

 epoch: 1480 | train_loss: 0.44, train_acc: 94.8% | test_loss: 0.40, test_acc: 95.0%

 epoch: 1481 | train_loss: 0.46, train_acc: 94.5% | test_loss: 0.48, test_acc: 94.2%


 10%|▉         | 1483/15000 [02:54<30:53,  7.29it/s]


 epoch: 1482 | train_loss: 0.44, train_acc: 94.7% | test_loss: 0.42, test_acc: 95.0%

 epoch: 1483 | train_loss: 0.50, train_acc: 94.4% | test_loss: 0.43, test_acc: 94.5%


 10%|▉         | 1485/15000 [02:54<27:43,  8.12it/s]


 epoch: 1484 | train_loss: 0.40, train_acc: 95.3% | test_loss: 0.40, test_acc: 95.1%

 epoch: 1485 | train_loss: 0.41, train_acc: 95.2% | test_loss: 0.45, test_acc: 94.7%


 10%|▉         | 1487/15000 [02:54<25:35,  8.80it/s]


 epoch: 1486 | train_loss: 0.48, train_acc: 94.2% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1487 | train_loss: 0.41, train_acc: 94.9% | test_loss: 0.42, test_acc: 94.8%


 10%|▉         | 1489/15000 [02:55<24:48,  9.08it/s]


 epoch: 1488 | train_loss: 0.40, train_acc: 95.3% | test_loss: 0.42, test_acc: 94.6%

 epoch: 1489 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.45, test_acc: 93.9%

input:       by borrowers the central government itself did not borrow money and without public debt had to fund deficits from

target:      by borrowers the central government itself did not borrow money and without public debt had to fund deficits from cash

prediction:  by borrowers the central government itself did not borrow money and without public debt had to fund deficits from the


 10%|▉         | 1490/15000 [02:55<26:36,  8.46it/s]


 epoch: 1490 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.45, test_acc: 94.9%

 epoch: 1491 | train_loss: 0.45, train_acc: 94.6% | test_loss: 0.48, test_acc: 94.3%


 10%|▉         | 1494/15000 [02:55<24:14,  9.28it/s]


 epoch: 1492 | train_loss: 0.41, train_acc: 95.1% | test_loss: 0.42, test_acc: 95.0%

 epoch: 1493 | train_loss: 0.45, train_acc: 94.6% | test_loss: 0.40, test_acc: 95.2%

 epoch: 1494 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.46, test_acc: 94.8%


 10%|▉         | 1496/15000 [02:55<22:29, 10.01it/s]


 epoch: 1495 | train_loss: 0.41, train_acc: 95.0% | test_loss: 0.45, test_acc: 94.7%

 epoch: 1496 | train_loss: 0.45, train_acc: 94.7% | test_loss: 0.45, test_acc: 94.7%

 epoch: 1497 | train_loss: 0.44, train_acc: 94.4% | test_loss: 0.42, test_acc: 94.9%


 10%|▉         | 1498/15000 [02:55<21:17, 10.57it/s]


 epoch: 1498 | train_loss: 0.47, train_acc: 93.2% | test_loss: 0.43, test_acc: 95.0%

 epoch: 1499 | train_loss: 0.43, train_acc: 95.2% | test_loss: 0.43, test_acc: 94.8%

input:       and social factors in accounting for behavior some concepts that sociologists have applied to the study of psychiatric disorders

target:      and social factors in accounting for behavior some concepts that sociologists have applied to the study of psychiatric disorders concepts

prediction:  and social factors in accounting for behavior some concepts that sociologists have applied to the study of smaller disorders the


 10%|█         | 1502/15000 [02:56<21:46, 10.33it/s]


 epoch: 1500 | train_loss: 0.45, train_acc: 94.4% | test_loss: 0.38, test_acc: 95.6%

 epoch: 1501 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.44, test_acc: 94.8%

 epoch: 1502 | train_loss: 0.44, train_acc: 94.2% | test_loss: 0.40, test_acc: 95.3%


 10%|█         | 1504/15000 [02:56<20:47, 10.82it/s]


 epoch: 1503 | train_loss: 0.46, train_acc: 94.3% | test_loss: 0.41, test_acc: 94.9%

 epoch: 1504 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.47, test_acc: 94.5%

 epoch: 1505 | train_loss: 0.45, train_acc: 95.0% | test_loss: 0.41, test_acc: 95.5%


 10%|█         | 1506/15000 [02:56<20:13, 11.12it/s]


 epoch: 1506 | train_loss: 0.40, train_acc: 95.3% | test_loss: 0.45, test_acc: 94.8%


 10%|█         | 1508/15000 [02:57<25:13,  8.91it/s]


 epoch: 1507 | train_loss: 0.38, train_acc: 95.7% | test_loss: 0.43, test_acc: 95.0%

 epoch: 1508 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.44, test_acc: 94.5%

 epoch: 1509 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.50, test_acc: 94.0%


 10%|█         | 1510/15000 [02:57<24:21,  9.23it/s]


input:       interviewees notable participants include former united states president barack obama while campaigning for the election bill gates multiple times

target:      interviewees notable participants include former united states president barack obama while campaigning for the election bill gates multiple times and

prediction:  theology notable participants include former united states president barack obama while campaigning for the election bill gates multiple times the

 epoch: 1510 | train_loss: 0.40, train_acc: 94.9% | test_loss: 0.46, test_acc: 94.6%

 epoch: 1511 | train_loss: 0.44, train_acc: 95.2% | test_loss: 0.38, test_acc: 95.3%


 10%|█         | 1514/15000 [02:57<21:38, 10.39it/s]


 epoch: 1512 | train_loss: 0.42, train_acc: 94.9% | test_loss: 0.42, test_acc: 95.3%

 epoch: 1513 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.39, test_acc: 95.1%

 epoch: 1514 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.42, test_acc: 94.9%


 10%|█         | 1516/15000 [02:57<20:52, 10.76it/s]


 epoch: 1515 | train_loss: 0.41, train_acc: 95.5% | test_loss: 0.46, test_acc: 94.6%

 epoch: 1516 | train_loss: 0.50, train_acc: 93.6% | test_loss: 0.39, test_acc: 95.6%

 epoch: 1517 | train_loss: 0.47, train_acc: 94.3% | test_loss: 0.42, test_acc: 95.0%


 10%|█         | 1518/15000 [02:57<20:35, 10.91it/s]


 epoch: 1518 | train_loss: 0.43, train_acc: 94.9% | test_loss: 0.41, test_acc: 95.2%

 epoch: 1519 | train_loss: 0.40, train_acc: 95.3% | test_loss: 0.46, test_acc: 94.7%

input:       amazon apple at coca cola disney general motors mcdonald meta microsoft nike pepsi and walmart were founded and are

target:      amazon apple at coca cola disney general motors mcdonald meta microsoft nike pepsi and walmart were founded and are headquartered

prediction:  amazon apple at fried lived disney general residents mcdonald meta microsoft nike grasslands and libya were founded and are the


 10%|█         | 1520/15000 [02:58<21:53, 10.26it/s]


 epoch: 1520 | train_loss: 0.43, train_acc: 94.8% | test_loss: 0.45, test_acc: 95.0%


 10%|█         | 1522/15000 [02:58<36:10,  6.21it/s]


 epoch: 1521 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.45, test_acc: 95.3%

 epoch: 1522 | train_loss: 0.48, train_acc: 94.1% | test_loss: 0.40, test_acc: 95.4%

 epoch: 1523 | train_loss: 0.48, train_acc: 94.3% | test_loss: 0.41, test_acc: 94.9%


 10%|█         | 1526/15000 [02:59<26:54,  8.35it/s]


 epoch: 1524 | train_loss: 0.42, train_acc: 94.9% | test_loss: 0.41, test_acc: 94.8%

 epoch: 1525 | train_loss: 0.45, train_acc: 94.7% | test_loss: 0.45, test_acc: 94.5%

 epoch: 1526 | train_loss: 0.39, train_acc: 95.2% | test_loss: 0.38, test_acc: 95.3%


 10%|█         | 1528/15000 [02:59<24:17,  9.24it/s]


 epoch: 1527 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.46, test_acc: 94.5%

 epoch: 1528 | train_loss: 0.43, train_acc: 94.7% | test_loss: 0.35, test_acc: 95.6%

 epoch: 1529 | train_loss: 0.38, train_acc: 95.3% | test_loss: 0.45, test_acc: 94.6%


 10%|█         | 1530/15000 [02:59<24:29,  9.17it/s]


input:       since the the big five openness to experience conscientiousness extraversion agreeableness and neuroticism emerged as an important trait theory

target:      since the the big five openness to experience conscientiousness extraversion agreeableness and neuroticism emerged as an important trait theory of

prediction:  since the the big five openness to experience conscientiousness began censuses and mechanical emerged as an important trait theory the

 epoch: 1530 | train_loss: 0.44, train_acc: 94.5% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1531 | train_loss: 0.41, train_acc: 95.0% | test_loss: 0.40, test_acc: 95.3%


 10%|█         | 1534/15000 [02:59<22:11, 10.12it/s]


 epoch: 1532 | train_loss: 0.48, train_acc: 94.6% | test_loss: 0.43, test_acc: 94.6%

 epoch: 1533 | train_loss: 0.40, train_acc: 95.5% | test_loss: 0.45, test_acc: 94.5%

 epoch: 1534 | train_loss: 0.48, train_acc: 94.2% | test_loss: 0.45, test_acc: 94.4%

 epoch: 1535 | train_loss: 0.42, train_acc: 95.2% | test_loss: 0.45, test_acc: 94.8%


 10%|█         | 1538/15000 [03:00<30:48,  7.28it/s]


 epoch: 1536 | train_loss: 0.42, train_acc: 94.8% | test_loss: 0.47, test_acc: 94.7%

 epoch: 1537 | train_loss: 0.47, train_acc: 94.1% | test_loss: 0.40, test_acc: 95.2%

 epoch: 1538 | train_loss: 0.43, train_acc: 94.8% | test_loss: 0.48, test_acc: 94.6%


 10%|█         | 1540/15000 [03:00<28:11,  7.96it/s]


 epoch: 1539 | train_loss: 0.48, train_acc: 93.9% | test_loss: 0.44, test_acc: 94.6%

input:       commonly filtered out by users even if they are safe for work the subreddit all originally did not filter

target:      commonly filtered out by users even if they are safe for work the subreddit all originally did not filter topics

prediction:  commonly stages out by users even if they are safe for work the subreddit all originally did not filter the

 epoch: 1540 | train_loss: 0.40, train_acc: 95.2% | test_loss: 0.46, test_acc: 94.4%

 epoch: 1541 | train_loss: 0.40, train_acc: 95.5% | test_loss: 0.40, test_acc: 95.6%


 10%|█         | 1544/15000 [03:01<23:39,  9.48it/s]


 epoch: 1542 | train_loss: 0.42, train_acc: 95.1% | test_loss: 0.42, test_acc: 95.2%

 epoch: 1543 | train_loss: 0.41, train_acc: 95.0% | test_loss: 0.39, test_acc: 95.2%

 epoch: 1544 | train_loss: 0.41, train_acc: 94.7% | test_loss: 0.47, test_acc: 94.6%


 10%|█         | 1546/15000 [03:01<22:40,  9.89it/s]


 epoch: 1545 | train_loss: 0.37, train_acc: 95.7% | test_loss: 0.38, test_acc: 95.1%

 epoch: 1546 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.43, test_acc: 94.8%

 epoch: 1547 | train_loss: 0.45, train_acc: 94.8% | test_loss: 0.37, test_acc: 95.5%


 10%|█         | 1548/15000 [03:01<22:32,  9.94it/s]


 epoch: 1548 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.47, test_acc: 94.9%

 epoch: 1549 | train_loss: 0.39, train_acc: 95.3% | test_loss: 0.37, test_acc: 95.2%

input:       technique of autogenic training prominently advocated sterilization and euthanasia of men considered genetically undesirable and devised techniques for facilitating

target:      technique of autogenic training prominently advocated sterilization and euthanasia of men considered genetically undesirable and devised techniques for facilitating this

prediction:  technique of farmers training prominently advocated sterilization and brother of men considered genetically invoked and devised techniques for easter the


 10%|█         | 1552/15000 [03:02<27:16,  8.22it/s]


 epoch: 1550 | train_loss: 0.43, train_acc: 94.8% | test_loss: 0.37, test_acc: 95.9%

 epoch: 1551 | train_loss: 0.44, train_acc: 94.7% | test_loss: 0.44, test_acc: 95.0%

 epoch: 1552 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.40, test_acc: 95.3%


 10%|█         | 1554/15000 [03:02<25:03,  8.95it/s]


 epoch: 1553 | train_loss: 0.36, train_acc: 95.6% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1554 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.39, test_acc: 95.1%

 epoch: 1555 | train_loss: 0.44, train_acc: 94.8% | test_loss: 0.45, test_acc: 94.6%


 10%|█         | 1558/15000 [03:02<22:05, 10.14it/s]


 epoch: 1556 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.42, test_acc: 94.8%

 epoch: 1557 | train_loss: 0.42, train_acc: 94.8% | test_loss: 0.46, test_acc: 94.7%

 epoch: 1558 | train_loss: 0.39, train_acc: 95.1% | test_loss: 0.38, test_acc: 95.7%


 10%|█         | 1560/15000 [03:02<22:38,  9.89it/s]


 epoch: 1559 | train_loss: 0.38, train_acc: 95.8% | test_loss: 0.41, test_acc: 95.4%

input:       nile river valley for agriculture the predictable flooding and controlled irrigation of the fertile valley produced surplus crops which

target:      nile river valley for agriculture the predictable flooding and controlled irrigation of the fertile valley produced surplus crops which supported

prediction:  nile river valley for agriculture the predictable flooding and controlled irrigation of the fertile valley produced surplus crops which the

 epoch: 1560 | train_loss: 0.45, train_acc: 95.1% | test_loss: 0.38, test_acc: 95.5%


 10%|█         | 1562/15000 [03:03<22:07, 10.12it/s]


 epoch: 1561 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.35, test_acc: 95.9%

 epoch: 1562 | train_loss: 0.42, train_acc: 95.1% | test_loss: 0.44, test_acc: 94.5%

 epoch: 1563 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.43, test_acc: 94.5%


 10%|█         | 1566/15000 [03:03<31:21,  7.14it/s]


 epoch: 1564 | train_loss: 0.41, train_acc: 95.5% | test_loss: 0.42, test_acc: 94.8%

 epoch: 1565 | train_loss: 0.44, train_acc: 94.8% | test_loss: 0.41, test_acc: 95.5%

 epoch: 1566 | train_loss: 0.45, train_acc: 94.5% | test_loss: 0.43, test_acc: 94.5%


 10%|█         | 1568/15000 [03:04<27:30,  8.14it/s]


 epoch: 1567 | train_loss: 0.40, train_acc: 95.2% | test_loss: 0.38, test_acc: 95.8%

 epoch: 1568 | train_loss: 0.44, train_acc: 94.7% | test_loss: 0.46, test_acc: 94.8%

 epoch: 1569 | train_loss: 0.40, train_acc: 94.9% | test_loss: 0.43, test_acc: 94.7%


 10%|█         | 1570/15000 [03:04<26:03,  8.59it/s]


input:       the highest state priesthoods but could play priestly role he could not marry woman from senatorial family nor achieve

target:      the highest state priesthoods but could play priestly role he could not marry woman from senatorial family nor achieve legitimate

prediction:  the highest state matchmaking but could play priestly role he could not marry woman from senatorial family nor achieve the

 epoch: 1570 | train_loss: 0.40, train_acc: 95.5% | test_loss: 0.44, test_acc: 95.2%

 epoch: 1571 | train_loss: 0.44, train_acc: 94.9% | test_loss: 0.43, test_acc: 95.1%


 10%|█         | 1574/15000 [03:04<22:46,  9.83it/s]


 epoch: 1572 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.40, test_acc: 95.3%

 epoch: 1573 | train_loss: 0.41, train_acc: 95.1% | test_loss: 0.46, test_acc: 94.7%

 epoch: 1574 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.41, test_acc: 95.2%


 11%|█         | 1576/15000 [03:04<22:03, 10.14it/s]


 epoch: 1575 | train_loss: 0.41, train_acc: 95.1% | test_loss: 0.42, test_acc: 95.1%

 epoch: 1576 | train_loss: 0.42, train_acc: 94.8% | test_loss: 0.40, test_acc: 95.2%

 epoch: 1577 | train_loss: 0.42, train_acc: 95.1% | test_loss: 0.42, test_acc: 94.9%


 11%|█         | 1578/15000 [03:04<21:38, 10.34it/s]


 epoch: 1578 | train_loss: 0.48, train_acc: 94.6% | test_loss: 0.38, test_acc: 95.6%


 11%|█         | 1580/15000 [03:05<39:13,  5.70it/s]


 epoch: 1579 | train_loss: 0.43, train_acc: 95.3% | test_loss: 0.42, test_acc: 95.1%

input:       theatre of the second world war the soviet invasion of poland started on september and poland fell soon thereafter

target:      theatre of the second world war the soviet invasion of poland started on september and poland fell soon thereafter on

prediction:  theatre of the second world war the soviet invasion of poland started on september and poland fell soon thereafter the

 epoch: 1580 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.40, test_acc: 95.2%


 11%|█         | 1582/15000 [03:05<34:13,  6.53it/s]


 epoch: 1581 | train_loss: 0.44, train_acc: 94.4% | test_loss: 0.43, test_acc: 95.3%

 epoch: 1582 | train_loss: 0.42, train_acc: 94.9% | test_loss: 0.41, test_acc: 95.2%


 11%|█         | 1584/15000 [03:06<30:44,  7.27it/s]


 epoch: 1583 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.43, test_acc: 94.5%

 epoch: 1584 | train_loss: 0.35, train_acc: 95.6% | test_loss: 0.40, test_acc: 95.5%


 11%|█         | 1586/15000 [03:06<27:57,  8.00it/s]


 epoch: 1585 | train_loss: 0.43, train_acc: 94.8% | test_loss: 0.38, test_acc: 95.8%

 epoch: 1586 | train_loss: 0.37, train_acc: 95.2% | test_loss: 0.44, test_acc: 94.7%


 11%|█         | 1588/15000 [03:06<27:01,  8.27it/s]


 epoch: 1587 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.40, test_acc: 95.0%

 epoch: 1588 | train_loss: 0.36, train_acc: 95.9% | test_loss: 0.44, test_acc: 94.7%


 11%|█         | 1590/15000 [03:06<30:34,  7.31it/s]


 epoch: 1589 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.43, test_acc: 94.7%

input:       europe will decline its population between and by per cent without changing immigration movements according to population projection of

target:      europe will decline its population between and by per cent without changing immigration movements according to population projection of the

prediction:  europe will decline its population between and by per cent without changing immigration movements according to population projection of the

 epoch: 1590 | train_loss: 0.42, train_acc: 95.1% | test_loss: 0.44, test_acc: 94.4%


 11%|█         | 1592/15000 [03:07<27:34,  8.10it/s]


 epoch: 1591 | train_loss: 0.41, train_acc: 94.8% | test_loss: 0.43, test_acc: 94.8%

 epoch: 1592 | train_loss: 0.41, train_acc: 94.9% | test_loss: 0.41, test_acc: 95.1%


 11%|█         | 1594/15000 [03:07<28:34,  7.82it/s]


 epoch: 1593 | train_loss: 0.46, train_acc: 94.4% | test_loss: 0.39, test_acc: 95.3%

 epoch: 1594 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.36, test_acc: 95.3%


 11%|█         | 1596/15000 [03:07<26:26,  8.45it/s]


 epoch: 1595 | train_loss: 0.40, train_acc: 94.9% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1596 | train_loss: 0.35, train_acc: 96.0% | test_loss: 0.46, test_acc: 94.6%


 11%|█         | 1598/15000 [03:07<25:48,  8.65it/s]


 epoch: 1597 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.37, test_acc: 95.7%

 epoch: 1598 | train_loss: 0.40, train_acc: 95.0% | test_loss: 0.46, test_acc: 94.5%


 11%|█         | 1600/15000 [03:08<27:23,  8.15it/s]


 epoch: 1599 | train_loss: 0.41, train_acc: 95.0% | test_loss: 0.40, test_acc: 95.1%

input:       were leaked on the internet this created fears that malicious users would take advantage of the code to develop

target:      were leaked on the internet this created fears that malicious users would take advantage of the code to develop potential

prediction:  were leaked on the internet this created fears that malicious users would take advantage of the code to develop the

 epoch: 1600 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.44, test_acc: 94.6%


 11%|█         | 1602/15000 [03:08<26:21,  8.47it/s]


 epoch: 1601 | train_loss: 0.43, train_acc: 94.7% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1602 | train_loss: 0.44, train_acc: 94.4% | test_loss: 0.44, test_acc: 94.8%


 11%|█         | 1604/15000 [03:08<25:30,  8.75it/s]


 epoch: 1603 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.41, test_acc: 94.8%

 epoch: 1604 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.46, test_acc: 94.6%


 11%|█         | 1605/15000 [03:08<25:37,  8.71it/s]


 epoch: 1605 | train_loss: 0.41, train_acc: 95.0% | test_loss: 0.38, test_acc: 95.5%


 11%|█         | 1608/15000 [03:09<39:24,  5.66it/s]


 epoch: 1606 | train_loss: 0.40, train_acc: 95.0% | test_loss: 0.38, test_acc: 95.3%

 epoch: 1607 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.39, test_acc: 95.3%

 epoch: 1608 | train_loss: 0.41, train_acc: 95.2% | test_loss: 0.41, test_acc: 95.5%


 11%|█         | 1610/15000 [03:09<32:32,  6.86it/s]


 epoch: 1609 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.37, test_acc: 95.5%

input:       to record and take screenshots in game via the windows built in gamedvr as of june the java and

target:      to record and take screenshots in game via the windows built in gamedvr as of june the java and bedrock

prediction:  to record and take screenshots in game via the windows built in expansion as of june the java and the

 epoch: 1610 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.39, test_acc: 95.0%

 epoch: 1611 | train_loss: 0.41, train_acc: 95.2% | test_loss: 0.40, test_acc: 95.5%


 11%|█         | 1614/15000 [03:09<24:35,  9.07it/s]


 epoch: 1612 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.38, test_acc: 95.2%

 epoch: 1613 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.38, test_acc: 95.4%

 epoch: 1614 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.41, test_acc: 94.9%


 11%|█         | 1616/15000 [03:10<22:33,  9.89it/s]


 epoch: 1615 | train_loss: 0.38, train_acc: 94.9% | test_loss: 0.45, test_acc: 94.6%

 epoch: 1616 | train_loss: 0.40, train_acc: 95.3% | test_loss: 0.43, test_acc: 95.1%

 epoch: 1617 | train_loss: 0.41, train_acc: 95.1% | test_loss: 0.37, test_acc: 95.4%


 11%|█         | 1618/15000 [03:10<21:50, 10.21it/s]


 epoch: 1618 | train_loss: 0.42, train_acc: 95.1% | test_loss: 0.40, test_acc: 95.2%

 epoch: 1619 | train_loss: 0.38, train_acc: 95.3% | test_loss: 0.43, test_acc: 94.9%

input:       south america one of the earliest known south american civilizations was at norte chico on the central peruvian coast

target:      south america one of the earliest known south american civilizations was at norte chico on the central peruvian coast though

prediction:  south america one of the earliest known south american civilizations was at norte chico on the central peruvian coast the


 11%|█         | 1620/15000 [03:10<22:54,  9.73it/s]


 epoch: 1620 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.47, test_acc: 94.5%


 11%|█         | 1622/15000 [03:11<37:51,  5.89it/s]


 epoch: 1621 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.39, test_acc: 95.1%

 epoch: 1622 | train_loss: 0.43, train_acc: 94.5% | test_loss: 0.38, test_acc: 95.3%

 epoch: 1623 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.42, test_acc: 95.0%


 11%|█         | 1626/15000 [03:11<27:29,  8.11it/s]


 epoch: 1624 | train_loss: 0.48, train_acc: 94.0% | test_loss: 0.44, test_acc: 94.4%

 epoch: 1625 | train_loss: 0.44, train_acc: 94.5% | test_loss: 0.41, test_acc: 95.3%

 epoch: 1626 | train_loss: 0.41, train_acc: 95.1% | test_loss: 0.37, test_acc: 95.5%


 11%|█         | 1628/15000 [03:11<25:02,  8.90it/s]


 epoch: 1627 | train_loss: 0.39, train_acc: 95.2% | test_loss: 0.40, test_acc: 94.9%

 epoch: 1628 | train_loss: 0.37, train_acc: 95.8% | test_loss: 0.46, test_acc: 94.2%

 epoch: 1629 | train_loss: 0.42, train_acc: 94.8% | test_loss: 0.38, test_acc: 95.5%


 11%|█         | 1630/15000 [03:11<24:27,  9.11it/s]


input:       manoeuvres and weapons testing military personnel or equipment are permitted only for scientific research or other peaceful purposes operation

target:      manoeuvres and weapons testing military personnel or equipment are permitted only for scientific research or other peaceful purposes operation by

prediction:  device and weapons testing military personnel or equipment are permitted only for scientific research or other peaceful purposes operation the

 epoch: 1630 | train_loss: 0.38, train_acc: 95.0% | test_loss: 0.41, test_acc: 95.2%

 epoch: 1631 | train_loss: 0.39, train_acc: 95.2% | test_loss: 0.40, test_acc: 94.6%


 11%|█         | 1634/15000 [03:12<21:09, 10.53it/s]


 epoch: 1632 | train_loss: 0.40, train_acc: 94.8% | test_loss: 0.42, test_acc: 94.8%

 epoch: 1633 | train_loss: 0.42, train_acc: 94.8% | test_loss: 0.36, test_acc: 95.8%

 epoch: 1634 | train_loss: 0.40, train_acc: 95.2% | test_loss: 0.41, test_acc: 95.2%


 11%|█         | 1636/15000 [03:12<25:42,  8.66it/s]


 epoch: 1635 | train_loss: 0.36, train_acc: 95.4% | test_loss: 0.43, test_acc: 94.6%

 epoch: 1636 | train_loss: 0.40, train_acc: 95.2% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1637 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.49, test_acc: 94.2%


 11%|█         | 1638/15000 [03:12<23:40,  9.40it/s]


 epoch: 1638 | train_loss: 0.43, train_acc: 94.9% | test_loss: 0.43, test_acc: 94.7%

 epoch: 1639 | train_loss: 0.40, train_acc: 94.8% | test_loss: 0.41, test_acc: 95.6%

input:       of cognitive behavior therapy among clinical psychologists increased key practice in behavioral and cognitive behavioral therapy is exposing patients

target:      of cognitive behavior therapy among clinical psychologists increased key practice in behavioral and cognitive behavioral therapy is exposing patients to

prediction:  of cognitive behavior therapy among clinical psychologists increased key practice in behavioral and cognitive behavioral therapy is exposing patients the


 11%|█         | 1642/15000 [03:13<21:48, 10.21it/s]


 epoch: 1640 | train_loss: 0.37, train_acc: 95.8% | test_loss: 0.39, test_acc: 95.4%

 epoch: 1641 | train_loss: 0.43, train_acc: 95.1% | test_loss: 0.44, test_acc: 94.7%

 epoch: 1642 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.41, test_acc: 95.2%


 11%|█         | 1644/15000 [03:13<20:56, 10.63it/s]


 epoch: 1643 | train_loss: 0.44, train_acc: 94.6% | test_loss: 0.37, test_acc: 95.4%

 epoch: 1644 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.36, test_acc: 95.7%

 epoch: 1645 | train_loss: 0.43, train_acc: 94.7% | test_loss: 0.39, test_acc: 95.4%


 11%|█         | 1648/15000 [03:13<20:36, 10.80it/s]


 epoch: 1646 | train_loss: 0.39, train_acc: 95.1% | test_loss: 0.44, test_acc: 94.9%

 epoch: 1647 | train_loss: 0.38, train_acc: 95.0% | test_loss: 0.40, test_acc: 95.2%

 epoch: 1648 | train_loss: 0.44, train_acc: 94.4% | test_loss: 0.43, test_acc: 95.0%

 epoch: 1649 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.36, test_acc: 95.4%

input:       of state is the president he is elected by parliament of fiji after nomination by the prime minister or

target:      of state is the president he is elected by parliament of fiji after nomination by the prime minister or the

prediction:  of state is the president he is elected by parliament of fiji after nomination by the prime minister or the


 11%|█         | 1652/15000 [03:14<29:42,  7.49it/s]


 epoch: 1650 | train_loss: 0.42, train_acc: 95.5% | test_loss: 0.37, test_acc: 95.6%

 epoch: 1651 | train_loss: 0.44, train_acc: 94.9% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1652 | train_loss: 0.36, train_acc: 96.0% | test_loss: 0.41, test_acc: 95.0%


 11%|█         | 1654/15000 [03:14<26:44,  8.32it/s]


 epoch: 1653 | train_loss: 0.43, train_acc: 94.9% | test_loss: 0.43, test_acc: 94.9%

 epoch: 1654 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1655 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.47, test_acc: 94.6%


 11%|█         | 1658/15000 [03:14<22:42,  9.79it/s]


 epoch: 1656 | train_loss: 0.45, train_acc: 94.2% | test_loss: 0.39, test_acc: 95.3%

 epoch: 1657 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.37, test_acc: 95.3%

 epoch: 1658 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.42, test_acc: 95.0%


 11%|█         | 1660/15000 [03:15<22:43,  9.79it/s]


 epoch: 1659 | train_loss: 0.39, train_acc: 95.2% | test_loss: 0.37, test_acc: 95.5%

input:       reopened access to github pages days later for public repositories regardless of location it was also revealed that using

target:      reopened access to github pages days later for public repositories regardless of location it was also revealed that using github

prediction:  yields access to github pages days later for public repositories regardless of location it was also revealed that using the

 epoch: 1660 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.41, test_acc: 95.1%


 11%|█         | 1662/15000 [03:15<22:01, 10.10it/s]


 epoch: 1661 | train_loss: 0.38, train_acc: 95.2% | test_loss: 0.44, test_acc: 94.8%

 epoch: 1662 | train_loss: 0.42, train_acc: 94.8% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1663 | train_loss: 0.43, train_acc: 94.9% | test_loss: 0.41, test_acc: 94.8%


 11%|█         | 1666/15000 [03:16<31:05,  7.15it/s]


 epoch: 1664 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.40, test_acc: 95.3%

 epoch: 1665 | train_loss: 0.43, train_acc: 95.0% | test_loss: 0.38, test_acc: 95.7%

 epoch: 1666 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.39, test_acc: 95.4%


 11%|█         | 1668/15000 [03:16<27:22,  8.12it/s]


 epoch: 1667 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.44, test_acc: 94.7%

 epoch: 1668 | train_loss: 0.41, train_acc: 95.3% | test_loss: 0.40, test_acc: 95.2%

 epoch: 1669 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.41, test_acc: 95.1%


 11%|█         | 1670/15000 [03:16<25:56,  8.57it/s]


input:       instead the concept of western civilization emerged as way of grouping together europe and these colonies the question of

target:      instead the concept of western civilization emerged as way of grouping together europe and these colonies the question of defining

prediction:  instead the concept of western civilization emerged as way of grouping together europe and these colonies the question of the

 epoch: 1670 | train_loss: 0.45, train_acc: 94.7% | test_loss: 0.42, test_acc: 95.0%

 epoch: 1671 | train_loss: 0.42, train_acc: 95.2% | test_loss: 0.44, test_acc: 94.6%


 11%|█         | 1674/15000 [03:16<22:40,  9.80it/s]


 epoch: 1672 | train_loss: 0.40, train_acc: 95.0% | test_loss: 0.44, test_acc: 94.8%

 epoch: 1673 | train_loss: 0.38, train_acc: 95.3% | test_loss: 0.39, test_acc: 95.4%

 epoch: 1674 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.41, test_acc: 94.9%


 11%|█         | 1676/15000 [03:16<21:43, 10.22it/s]


 epoch: 1675 | train_loss: 0.38, train_acc: 95.3% | test_loss: 0.41, test_acc: 95.0%

 epoch: 1676 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1677 | train_loss: 0.43, train_acc: 94.7% | test_loss: 0.36, test_acc: 95.8%


 11%|█         | 1680/15000 [03:17<24:00,  9.25it/s]


 epoch: 1678 | train_loss: 0.41, train_acc: 95.1% | test_loss: 0.42, test_acc: 95.3%

 epoch: 1679 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.43, test_acc: 94.7%

input:       the term computer language is sometimes used interchangeably with programming language however the usage of both terms varies among

target:      the term computer language is sometimes used interchangeably with programming language however the usage of both terms varies among authors

prediction:  the term computer language is sometimes used package with programming language however the usage of both terms varies among the

 epoch: 1680 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.39, test_acc: 95.1%


 11%|█         | 1682/15000 [03:17<22:26,  9.89it/s]


 epoch: 1681 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.49, test_acc: 94.0%

 epoch: 1682 | train_loss: 0.45, train_acc: 95.2% | test_loss: 0.41, test_acc: 94.9%

 epoch: 1683 | train_loss: 0.40, train_acc: 95.5% | test_loss: 0.40, test_acc: 95.0%


 11%|█         | 1686/15000 [03:17<20:54, 10.61it/s]


 epoch: 1684 | train_loss: 0.44, train_acc: 94.7% | test_loss: 0.39, test_acc: 94.9%

 epoch: 1685 | train_loss: 0.42, train_acc: 94.8% | test_loss: 0.43, test_acc: 94.6%

 epoch: 1686 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.40, test_acc: 95.1%


 11%|█▏        | 1688/15000 [03:18<21:01, 10.56it/s]


 epoch: 1687 | train_loss: 0.40, train_acc: 95.5% | test_loss: 0.40, test_acc: 95.0%

 epoch: 1688 | train_loss: 0.44, train_acc: 94.7% | test_loss: 0.38, test_acc: 95.2%

 epoch: 1689 | train_loss: 0.39, train_acc: 95.2% | test_loss: 0.42, test_acc: 94.9%


 11%|█▏        | 1690/15000 [03:18<21:59, 10.09it/s]


input:       hooker telescope at mount wilson observatory this allowed him to estimate distances to galaxies whose redshifts had already been

target:      hooker telescope at mount wilson observatory this allowed him to estimate distances to galaxies whose redshifts had already been measured

prediction:  uplands telescope at mount wilson observatory this allowed him to estimate distances to galaxies whose redshifts had already been the

 epoch: 1690 | train_loss: 0.41, train_acc: 95.0% | test_loss: 0.36, test_acc: 95.7%

 epoch: 1691 | train_loss: 0.35, train_acc: 96.1% | test_loss: 0.42, test_acc: 95.2%


 11%|█▏        | 1693/15000 [03:19<34:23,  6.45it/s]


 epoch: 1692 | train_loss: 0.39, train_acc: 95.2% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1693 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.36, test_acc: 95.7%

 epoch: 1694 | train_loss: 0.45, train_acc: 94.7% | test_loss: 0.40, test_acc: 95.2%


 11%|█▏        | 1697/15000 [03:19<27:10,  8.16it/s]


 epoch: 1695 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.43, test_acc: 94.7%

 epoch: 1696 | train_loss: 0.42, train_acc: 95.2% | test_loss: 0.38, test_acc: 95.7%

 epoch: 1697 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.40, test_acc: 95.4%


 11%|█▏        | 1698/15000 [03:19<26:25,  8.39it/s]


 epoch: 1698 | train_loss: 0.41, train_acc: 95.3% | test_loss: 0.40, test_acc: 95.6%

 epoch: 1699 | train_loss: 0.40, train_acc: 95.3% | test_loss: 0.39, test_acc: 95.3%

input:       victories ultimately the assyrians pushed the kushites back into nubia occupied memphis and sacked the temples of thebes the

target:      victories ultimately the assyrians pushed the kushites back into nubia occupied memphis and sacked the temples of thebes the assyrians

prediction: 

 11%|█▏        | 1700/15000 [03:19<26:42,  8.30it/s]

 victories ultimately the assyrians pushed the kushites back into nubia occupied memphis and sacked the temples of thebes the the

 epoch: 1700 | train_loss: 0.34, train_acc: 95.7% | test_loss: 0.39, test_acc: 95.1%

 epoch: 1701 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.37, test_acc: 95.5%


 11%|█▏        | 1703/15000 [03:20<24:49,  8.93it/s]


 epoch: 1702 | train_loss: 0.37, train_acc: 95.4% | test_loss: 0.40, test_acc: 95.1%

 epoch: 1703 | train_loss: 0.35, train_acc: 96.0% | test_loss: 0.38, test_acc: 95.4%


 11%|█▏        | 1705/15000 [03:20<24:42,  8.97it/s]


 epoch: 1704 | train_loss: 0.38, train_acc: 95.2% | test_loss: 0.40, test_acc: 95.3%

 epoch: 1705 | train_loss: 0.43, train_acc: 94.9% | test_loss: 0.38, test_acc: 95.7%


 11%|█▏        | 1708/15000 [03:21<36:29,  6.07it/s]


 epoch: 1706 | train_loss: 0.37, train_acc: 95.9% | test_loss: 0.46, test_acc: 94.2%

 epoch: 1707 | train_loss: 0.42, train_acc: 95.3% | test_loss: 0.38, test_acc: 95.0%

 epoch: 1708 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.38, test_acc: 95.7%


 11%|█▏        | 1710/15000 [03:21<33:53,  6.53it/s]


 epoch: 1709 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.39, test_acc: 95.4%

input:       to combine different functions from existing drafts and set them into scenes that illustrated their utility by reconstituting technical

target:      to combine different functions from existing drafts and set them into scenes that illustrated their utility by reconstituting technical inventions

prediction:  to combine different functions from existing drafts and set them into scenes that illustrated their utility by reconstituting technical the

 epoch: 1710 | train_loss: 0.44, train_acc: 94.9% | test_loss: 0.39, test_acc: 95.5%


 11%|█▏        | 1712/15000 [03:21<29:27,  7.52it/s]


 epoch: 1711 | train_loss: 0.36, train_acc: 95.9% | test_loss: 0.33, test_acc: 96.0%

 epoch: 1712 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.35, test_acc: 95.7%


 11%|█▏        | 1714/15000 [03:21<26:51,  8.24it/s]


 epoch: 1713 | train_loss: 0.41, train_acc: 94.9% | test_loss: 0.47, test_acc: 94.6%

 epoch: 1714 | train_loss: 0.35, train_acc: 95.6% | test_loss: 0.35, test_acc: 96.0%


 11%|█▏        | 1717/15000 [03:22<24:09,  9.16it/s]


 epoch: 1715 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1716 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.42, test_acc: 94.6%

 epoch: 1717 | train_loss: 0.43, train_acc: 95.2% | test_loss: 0.39, test_acc: 95.2%


 11%|█▏        | 1718/15000 [03:22<24:17,  9.11it/s]


 epoch: 1718 | train_loss: 0.41, train_acc: 95.5% | test_loss: 0.35, test_acc: 95.5%

 epoch: 1719 | train_loss: 0.34, train_acc: 95.7% | test_loss: 0.36, test_acc: 95.9%

input:       unskilled rural labourer in roman egypt though greek farm incomes too were on average lower than those available to

target:      unskilled rural labourer in roman egypt though greek farm incomes too were on average lower than those available to urban


 11%|█▏        | 1720/15000 [03:22<27:54,  7.93it/s]


prediction:  unskilled rural labourer in roman egypt though greek farm incomes too were on average lower than those available to the

 epoch: 1720 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.38, test_acc: 95.6%

 epoch: 1721 | train_loss: 0.41, train_acc: 94.9% | test_loss: 0.42, test_acc: 94.6%


 11%|█▏        | 1724/15000 [03:22<22:22,  9.89it/s]


 epoch: 1722 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.40, test_acc: 95.3%

 epoch: 1723 | train_loss: 0.40, train_acc: 95.2% | test_loss: 0.37, test_acc: 95.6%

 epoch: 1724 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.37, test_acc: 95.6%


 12%|█▏        | 1726/15000 [03:22<21:07, 10.48it/s]


 epoch: 1725 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.3%

 epoch: 1726 | train_loss: 0.37, train_acc: 95.3% | test_loss: 0.36, test_acc: 95.4%

 epoch: 1727 | train_loss: 0.42, train_acc: 94.7% | test_loss: 0.43, test_acc: 95.0%


 12%|█▏        | 1728/15000 [03:23<20:20, 10.87it/s]


 epoch: 1728 | train_loss: 0.43, train_acc: 94.8% | test_loss: 0.43, test_acc: 95.0%

 epoch: 1729 | train_loss: 0.40, train_acc: 95.2% | test_loss: 0.42, test_acc: 95.2%

input:       the highly remote baker island and wake island now administered by the military this is due to their

target:      the highly remote baker island and wake island now administered by the military this is due to their location

prediction:  the highly remote baker island and wake island now administered by the military this is due to their and


 12%|█▏        | 1732/15000 [03:23<20:32, 10.77it/s]


 epoch: 1730 | train_loss: 0.44, train_acc: 94.5% | test_loss: 0.46, test_acc: 94.5%

 epoch: 1731 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.39, test_acc: 95.0%

 epoch: 1732 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.36, test_acc: 95.7%

 epoch: 1733 | train_loss: 0.36, train_acc: 96.0% | test_loss: 0.39, test_acc: 95.3%


 12%|█▏        | 1736/15000 [03:24<30:20,  7.29it/s]


 epoch: 1734 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.35, test_acc: 95.6%

 epoch: 1735 | train_loss: 0.38, train_acc: 95.3% | test_loss: 0.37, test_acc: 95.6%

 epoch: 1736 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.39, test_acc: 95.7%


 12%|█▏        | 1738/15000 [03:24<26:35,  8.31it/s]


 epoch: 1737 | train_loss: 0.44, train_acc: 94.1% | test_loss: 0.41, test_acc: 94.9%

 epoch: 1738 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.5%

 epoch: 1739 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.41, test_acc: 94.9%


 12%|█▏        | 1740/15000 [03:24<25:25,  8.69it/s]


input:       central pacific islands from cultural influences that spread through large continental landmasses and adjacent islands the islands of the

target:      central pacific islands from cultural influences that spread through large continental landmasses and adjacent islands the islands of the malay

prediction:  central pacific islands from cultural influences that spread through large continental landmasses and adjacent islands the islands of the the

 epoch: 1740 | train_loss: 0.37, train_acc: 96.0% | test_loss: 0.32, test_acc: 96.3%

 epoch: 1741 | train_loss: 0.36, train_acc: 96.0% | test_loss: 0.41, test_acc: 95.1%


 12%|█▏        | 1744/15000 [03:25<22:23,  9.87it/s]


 epoch: 1742 | train_loss: 0.35, train_acc: 95.4% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1743 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.40, test_acc: 95.4%

 epoch: 1744 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.3%


 12%|█▏        | 1746/15000 [03:25<21:16, 10.38it/s]


 epoch: 1745 | train_loss: 0.37, train_acc: 95.8% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1746 | train_loss: 0.38, train_acc: 95.1% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1747 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.40, test_acc: 94.9%


 12%|█▏        | 1750/15000 [03:25<22:44,  9.71it/s]


 epoch: 1748 | train_loss: 0.38, train_acc: 95.2% | test_loss: 0.40, test_acc: 95.1%

 epoch: 1749 | train_loss: 0.39, train_acc: 95.0% | test_loss: 0.36, test_acc: 95.4%

input:       language processing company in september reddit acquired spiketrap for an undisclosed sum in june fidelity the lead investor in

target:      language processing company in september reddit acquired spiketrap for an undisclosed sum in june fidelity the lead investor in reddit

prediction:  language processing company in september reddit acquired detail for an undisclosed sum in june fidelity the lead twelve in the

 epoch: 1750 | train_loss: 0.40, train_acc: 95.6% | test_loss: 0.42, test_acc: 95.1%


 12%|█▏        | 1752/15000 [03:25<21:20, 10.35it/s]


 epoch: 1751 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.43, test_acc: 94.7%

 epoch: 1752 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.39, test_acc: 95.3%

 epoch: 1753 | train_loss: 0.41, train_acc: 94.6% | test_loss: 0.34, test_acc: 96.0%


 12%|█▏        | 1756/15000 [03:26<19:48, 11.14it/s]


 epoch: 1754 | train_loss: 0.40, train_acc: 94.9% | test_loss: 0.36, test_acc: 95.8%

 epoch: 1755 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1756 | train_loss: 0.46, train_acc: 94.6% | test_loss: 0.41, test_acc: 94.6%


 12%|█▏        | 1758/15000 [03:26<19:40, 11.22it/s]


 epoch: 1757 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.38, test_acc: 95.1%

 epoch: 1758 | train_loss: 0.35, train_acc: 95.5% | test_loss: 0.37, test_acc: 95.7%

 epoch: 1759 | train_loss: 0.41, train_acc: 95.0% | test_loss: 0.39, test_acc: 95.3%


 12%|█▏        | 1760/15000 [03:26<20:45, 10.63it/s]


input:       ambitious naval expedition to dominate sicily the expedition ended in disaster at the harbor of syracuse with almost the

target:      ambitious naval expedition to dominate sicily the expedition ended in disaster at the harbor of syracuse with almost the entire

prediction:  ambitious naval expedition to dominate sicily the expedition ended in disaster at the harbor of syracuse with almost the the

 epoch: 1760 | train_loss: 0.36, train_acc: 95.6% | test_loss: 0.37, test_acc: 95.7%

 epoch: 1761 | train_loss: 0.38, train_acc: 95.8% | test_loss: 0.42, test_acc: 94.7%


 12%|█▏        | 1764/15000 [03:27<26:13,  8.41it/s]


 epoch: 1762 | train_loss: 0.40, train_acc: 95.3% | test_loss: 0.44, test_acc: 94.7%

 epoch: 1763 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1764 | train_loss: 0.40, train_acc: 94.9% | test_loss: 0.39, test_acc: 95.3%


 12%|█▏        | 1766/15000 [03:27<23:57,  9.21it/s]


 epoch: 1765 | train_loss: 0.39, train_acc: 95.3% | test_loss: 0.40, test_acc: 95.0%

 epoch: 1766 | train_loss: 0.36, train_acc: 95.6% | test_loss: 0.37, test_acc: 95.9%

 epoch: 1767 | train_loss: 0.37, train_acc: 96.0% | test_loss: 0.37, test_acc: 95.6%


 12%|█▏        | 1770/15000 [03:27<21:45, 10.14it/s]


 epoch: 1768 | train_loss: 0.38, train_acc: 95.3% | test_loss: 0.40, test_acc: 95.1%

 epoch: 1769 | train_loss: 0.40, train_acc: 94.9% | test_loss: 0.34, test_acc: 95.6%

input:       chinese audiences between and had significant influence on psychology in china chancellor sai yuan ei introduced him at peking

target:      chinese audiences between and had significant influence on psychology in china chancellor sai yuan ei introduced him at peking university

prediction:  chinese audiences between and had significant influence on psychology in china regulation perspectives federated santiago introduced him at pain the

 epoch: 1770 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.42, test_acc: 94.9%


 12%|█▏        | 1772/15000 [03:27<20:50, 10.58it/s]


 epoch: 1771 | train_loss: 0.34, train_acc: 95.5% | test_loss: 0.42, test_acc: 94.7%

 epoch: 1772 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.34, test_acc: 95.9%

 epoch: 1773 | train_loss: 0.37, train_acc: 95.8% | test_loss: 0.37, test_acc: 95.7%


 12%|█▏        | 1776/15000 [03:28<19:55, 11.06it/s]


 epoch: 1774 | train_loss: 0.40, train_acc: 95.5% | test_loss: 0.36, test_acc: 95.7%

 epoch: 1775 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.43, test_acc: 94.6%

 epoch: 1776 | train_loss: 0.32, train_acc: 96.0% | test_loss: 0.34, test_acc: 95.3%


 12%|█▏        | 1778/15000 [03:28<32:53,  6.70it/s]


 epoch: 1777 | train_loss: 0.40, train_acc: 95.3% | test_loss: 0.33, test_acc: 96.3%

 epoch: 1778 | train_loss: 0.38, train_acc: 95.2% | test_loss: 0.36, test_acc: 95.9%

 epoch: 1779 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.41, test_acc: 94.4%


 12%|█▏        | 1780/15000 [03:28<29:23,  7.50it/s]


input:       been first inhabited by humans when people were crossing the bering land bridge now the bering strait at least

target:      been first inhabited by humans when people were crossing the bering land bridge now the bering strait at least years

prediction:  been first inhabited by humans when people were crossing the bering land bridge now the bering strait at least the

 epoch: 1780 | train_loss: 0.41, train_acc: 95.2% | test_loss: 0.41, test_acc: 94.7%

 epoch: 1781 | train_loss: 0.35, train_acc: 95.5% | test_loss: 0.33, test_acc: 96.2%


 12%|█▏        | 1784/15000 [03:29<23:39,  9.31it/s]


 epoch: 1782 | train_loss: 0.37, train_acc: 95.7% | test_loss: 0.37, test_acc: 95.9%

 epoch: 1783 | train_loss: 0.40, train_acc: 95.3% | test_loss: 0.39, test_acc: 95.4%

 epoch: 1784 | train_loss: 0.36, train_acc: 95.9% | test_loss: 0.38, test_acc: 95.4%


 12%|█▏        | 1786/15000 [03:29<22:04,  9.98it/s]


 epoch: 1785 | train_loss: 0.39, train_acc: 95.6% | test_loss: 0.37, test_acc: 95.5%

 epoch: 1786 | train_loss: 0.35, train_acc: 95.5% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1787 | train_loss: 0.40, train_acc: 95.0% | test_loss: 0.39, test_acc: 95.3%


 12%|█▏        | 1788/15000 [03:29<21:15, 10.36it/s]


 epoch: 1788 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.34, test_acc: 95.9%

 epoch: 1789 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.39, test_acc: 95.2%

input:       julian as co augustus prevented the roman civil war of from reaching constantinople julian would serve as the sole

target:      julian as co augustus prevented the roman civil war of from reaching constantinople julian would serve as the sole emperor

prediction:  julian as co augustus prevented the roman civil war of from reaching constantinople julian would serve as the sole the


 12%|█▏        | 1790/15000 [03:29<21:34, 10.21it/s]


 epoch: 1790 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.34, test_acc: 96.0%


 12%|█▏        | 1792/15000 [03:30<35:25,  6.22it/s]


 epoch: 1791 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.35, test_acc: 95.9%

 epoch: 1792 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.41, test_acc: 94.7%

 epoch: 1793 | train_loss: 0.39, train_acc: 95.1% | test_loss: 0.34, test_acc: 95.9%


 12%|█▏        | 1796/15000 [03:30<26:12,  8.39it/s]


 epoch: 1794 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.40, test_acc: 95.0%

 epoch: 1795 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.35, test_acc: 95.5%

 epoch: 1796 | train_loss: 0.31, train_acc: 96.6% | test_loss: 0.38, test_acc: 95.2%


 12%|█▏        | 1798/15000 [03:30<23:40,  9.30it/s]


 epoch: 1797 | train_loss: 0.42, train_acc: 94.7% | test_loss: 0.35, test_acc: 95.6%

 epoch: 1798 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.39, test_acc: 95.6%

 epoch: 1799 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.37, test_acc: 95.7%


 12%|█▏        | 1800/15000 [03:31<23:24,  9.40it/s]


input:       showcased the power and prestige of hellenistic rule and became centre of learning and culture that included the famous

target:      showcased the power and prestige of hellenistic rule and became centre of learning and culture that included the famous library

prediction:  showcased the power and prestige of hellenistic rule and became centre of learning and culture that included the famous the

 epoch: 1800 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.38, test_acc: 95.6%

 epoch: 1801 | train_loss: 0.38, train_acc: 95.1% | test_loss: 0.39, test_acc: 95.3%


 12%|█▏        | 1804/15000 [03:31<22:01,  9.99it/s]


 epoch: 1802 | train_loss: 0.35, train_acc: 96.0% | test_loss: 0.41, test_acc: 95.2%

 epoch: 1803 | train_loss: 0.37, train_acc: 94.9% | test_loss: 0.40, test_acc: 94.8%

 epoch: 1804 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.38, test_acc: 95.5%


 12%|█▏        | 1806/15000 [03:32<34:33,  6.36it/s]


 epoch: 1805 | train_loss: 0.35, train_acc: 95.6% | test_loss: 0.43, test_acc: 94.8%

 epoch: 1806 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.42, test_acc: 95.0%

 epoch: 1807 | train_loss: 0.37, train_acc: 95.8% | test_loss: 0.37, test_acc: 95.6%


 12%|█▏        | 1808/15000 [03:32<29:46,  7.39it/s]


 epoch: 1808 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.43, test_acc: 95.0%

 epoch: 1809 | train_loss: 0.43, train_acc: 94.6% | test_loss: 0.38, test_acc: 95.4%

input:       in bc to the establishment of byzantium by constantine as the capital of the roman empire in ad finally

target:      in bc to the establishment of byzantium by constantine as the capital of the roman empire in ad finally late

prediction:  in bc to the establishment of byzantium by constantine as the capital of the roman empire in ad finally the


 12%|█▏        | 1811/15000 [03:32<26:51,  8.19it/s]


 epoch: 1810 | train_loss: 0.35, train_acc: 96.0% | test_loss: 0.40, test_acc: 95.4%

 epoch: 1811 | train_loss: 0.38, train_acc: 95.2% | test_loss: 0.35, test_acc: 95.5%

 epoch: 1812 | train_loss: 0.35, train_acc: 95.5% | test_loss: 0.34, test_acc: 95.6%

 12%|█▏        | 1813/15000 [03:32<25:06,  8.75it/s]



 epoch: 1813 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.38, test_acc: 95.5%


 12%|█▏        | 1815/15000 [03:33<25:25,  8.64it/s]


 epoch: 1814 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.35, test_acc: 95.8%

 epoch: 1815 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.5%


 12%|█▏        | 1817/15000 [03:33<25:45,  8.53it/s]


 epoch: 1816 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.4%

 epoch: 1817 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.42, test_acc: 94.9%


 12%|█▏        | 1819/15000 [03:33<25:51,  8.50it/s]


 epoch: 1818 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.46, test_acc: 94.6%

 epoch: 1819 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.37, test_acc: 95.5%

input:       baekje goguryeo and silla emerged and expelled the chinese goguryeo and baekje were eventually destroyed by tang dynasty and

target:      baekje goguryeo and silla emerged and expelled the chinese goguryeo and baekje were eventually destroyed by tang dynasty and silla

prediction:  baekje goguryeo and silla emerged and expelled the chinese goguryeo and baekje were eventually destroyed by tang dynasty and the


 12%|█▏        | 1822/15000 [03:34<37:32,  5.85it/s]


 epoch: 1820 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1821 | train_loss: 0.39, train_acc: 95.2% | test_loss: 0.38, test_acc: 95.2%

 epoch: 1822 | train_loss: 0.41, train_acc: 95.0% | test_loss: 0.38, test_acc: 95.5%


 12%|█▏        | 1824/15000 [03:34<32:22,  6.78it/s]


 epoch: 1823 | train_loss: 0.37, train_acc: 95.9% | test_loss: 0.38, test_acc: 95.8%

 epoch: 1824 | train_loss: 0.38, train_acc: 95.3% | test_loss: 0.40, test_acc: 95.1%


 12%|█▏        | 1826/15000 [03:34<28:01,  7.83it/s]


 epoch: 1825 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.39, test_acc: 95.3%

 epoch: 1826 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.35, test_acc: 95.9%


 12%|█▏        | 1829/15000 [03:34<24:29,  8.96it/s]


 epoch: 1827 | train_loss: 0.40, train_acc: 95.0% | test_loss: 0.40, test_acc: 95.1%

 epoch: 1828 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.39, test_acc: 95.3%

 epoch: 1829 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.34, test_acc: 96.2%


 12%|█▏        | 1831/15000 [03:35<26:18,  8.34it/s]


input:       current location of his remains is unclear much of ch teau amboise was damaged during the french revolution leading

target:      current location of his remains is unclear much of ch teau amboise was damaged during the french revolution leading to

prediction:  current location of his remains is unclear much of ch teau amboise was damaged during the french revolution leading the

 epoch: 1830 | train_loss: 0.37, train_acc: 95.3% | test_loss: 0.36, test_acc: 95.8%

 epoch: 1831 | train_loss: 0.40, train_acc: 95.4% | test_loss: 0.38, test_acc: 95.2%


 12%|█▏        | 1833/15000 [03:35<25:54,  8.47it/s]


 epoch: 1832 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.5%

 epoch: 1833 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.42, test_acc: 95.0%


 12%|█▏        | 1836/15000 [03:36<36:19,  6.04it/s]


 epoch: 1834 | train_loss: 0.37, train_acc: 95.9% | test_loss: 0.38, test_acc: 95.3%

 epoch: 1835 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.40, test_acc: 95.3%

 epoch: 1836 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.38, test_acc: 95.4%


 12%|█▏        | 1838/15000 [03:36<28:59,  7.57it/s]


 epoch: 1837 | train_loss: 0.37, train_acc: 95.3% | test_loss: 0.39, test_acc: 95.1%

 epoch: 1838 | train_loss: 0.39, train_acc: 95.1% | test_loss: 0.41, test_acc: 95.1%

 epoch: 1839 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.36, test_acc: 95.8%


 12%|█▏        | 1840/15000 [03:36<26:53,  8.16it/s]


input:       homestead act of was enacted specifically to break cycle of debt during reconstruction prior to this act blacks and

target:      homestead act of was enacted specifically to break cycle of debt during reconstruction prior to this act blacks and impoverished

prediction:  homestead act of was enacted specifically to break cycle of debt during reconstruction prior to this act blacks and the

 epoch: 1840 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.41, test_acc: 95.0%

 epoch: 1841 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.39, test_acc: 95.3%


 12%|█▏        | 1844/15000 [03:36<23:12,  9.45it/s]


 epoch: 1842 | train_loss: 0.37, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.4%

 epoch: 1843 | train_loss: 0.41, train_acc: 95.0% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1844 | train_loss: 0.35, train_acc: 95.6% | test_loss: 0.38, test_acc: 95.3%


 12%|█▏        | 1846/15000 [03:37<22:03,  9.94it/s]


 epoch: 1845 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.44, test_acc: 94.6%

 epoch: 1846 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.5%

 epoch: 1847 | train_loss: 0.43, train_acc: 94.7% | test_loss: 0.38, test_acc: 95.0%


 12%|█▏        | 1848/15000 [03:37<25:25,  8.62it/s]


 epoch: 1848 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.37, test_acc: 95.3%

 epoch: 1849 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.37, test_acc: 95.5%

input:       telephone while several individual countries have maintained high growth rates since overall growth has considerably slowed primarily as result

target:      telephone while several individual countries have maintained high growth rates since overall growth has considerably slowed primarily as result of

prediction:  shaped while several individual countries have maintained high growth rates since overall growth has considerably slowed primarily as result the

 epoch: 1850 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.38, test_acc: 95.1%

 12%|█▏        | 1852/15000 [03:37<22:25,  9.77it/s]



 epoch: 1851 | train_loss: 0.36, train_acc: 96.2% | test_loss: 0.43, test_acc: 94.9%

 epoch: 1852 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.32, test_acc: 96.1%


 12%|█▏        | 1854/15000 [03:37<21:01, 10.42it/s]


 epoch: 1853 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.40, test_acc: 94.6%

 epoch: 1854 | train_loss: 0.34, train_acc: 95.7% | test_loss: 0.36, test_acc: 95.8%

 epoch: 1855 | train_loss: 0.36, train_acc: 95.6% | test_loss: 0.38, test_acc: 95.5%


 12%|█▏        | 1858/15000 [03:38<19:48, 11.06it/s]


 epoch: 1856 | train_loss: 0.42, train_acc: 95.1% | test_loss: 0.38, test_acc: 95.3%

 epoch: 1857 | train_loss: 0.37, train_acc: 95.4% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1858 | train_loss: 0.37, train_acc: 95.4% | test_loss: 0.39, test_acc: 95.5%


 12%|█▏        | 1860/15000 [03:38<20:59, 10.44it/s]


 epoch: 1859 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.43, test_acc: 94.8%

input:       communities or subreddits submissions with more upvotes appear towards the top of their subreddit and if they receive enough

target:      communities or subreddits submissions with more upvotes appear towards the top of their subreddit and if they receive enough upvotes

prediction:  communities or subreddits describing with more upvotes appear towards the top of their subreddit and if they receive enough the

 epoch: 1860 | train_loss: 0.40, train_acc: 95.1% | test_loss: 0.38, test_acc: 95.2%

 epoch: 1861 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.41, test_acc: 94.8%


 12%|█▏        | 1864/15000 [03:39<30:23,  7.20it/s]


 epoch: 1862 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.36, test_acc: 95.6%

 epoch: 1863 | train_loss: 0.40, train_acc: 95.2% | test_loss: 0.39, test_acc: 95.2%

 epoch: 1864 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.42, test_acc: 94.8%


 12%|█▏        | 1867/15000 [03:39<25:15,  8.67it/s]


 epoch: 1865 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.35, test_acc: 95.9%

 epoch: 1866 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.40, test_acc: 94.6%

 epoch: 1867 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.37, test_acc: 95.5%


 12%|█▏        | 1869/15000 [03:39<22:59,  9.52it/s]


 epoch: 1868 | train_loss: 0.38, train_acc: 95.7% | test_loss: 0.36, test_acc: 95.5%

 epoch: 1869 | train_loss: 0.42, train_acc: 95.0% | test_loss: 0.37, test_acc: 95.5%

input:       the early as programming language it used compiler to automatically convert the language into machine code the first code

target:      the early as programming language it used compiler to automatically convert the language into machine code the first code and

prediction:  the early as programming language it used compiler to automatically convert the language into machine code the first code the


 12%|█▏        | 1871/15000 [03:39<22:54,  9.55it/s]


 epoch: 1870 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.44, test_acc: 94.9%

 epoch: 1871 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.36, test_acc: 95.8%

 epoch: 1872 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.35, test_acc: 95.5%


 12%|█▎        | 1875/15000 [03:40<20:56, 10.45it/s]


 epoch: 1873 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.34, test_acc: 96.0%

 epoch: 1874 | train_loss: 0.37, train_acc: 95.8% | test_loss: 0.34, test_acc: 95.9%

 epoch: 1875 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.39, test_acc: 95.6%

 epoch: 1876 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.39, test_acc: 95.5%


 13%|█▎        | 1879/15000 [03:41<30:25,  7.19it/s]


 epoch: 1877 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.35, test_acc: 95.6%

 epoch: 1878 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.37, test_acc: 95.4%

 epoch: 1879 | train_loss: 0.34, train_acc: 95.7% | test_loss: 0.38, test_acc: 95.5%


 13%|█▎        | 1880/15000 [03:41<29:50,  7.33it/s]


input:       systems when early hominins acquired the ability to form theory of mind and shared intentionality this development is sometimes

target:      systems when early hominins acquired the ability to form theory of mind and shared intentionality this development is sometimes thought

prediction:  systems when early consonants acquired the ability to form theory of mind and shared contract this development is sometimes the

 epoch: 1880 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.35, test_acc: 95.5%

 epoch: 1881 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.36, test_acc: 95.6%


 13%|█▎        | 1884/15000 [03:41<23:59,  9.11it/s]


 epoch: 1882 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.43, test_acc: 95.1%

 epoch: 1883 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.35, test_acc: 96.2%

 epoch: 1884 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.37, test_acc: 95.6%


 13%|█▎        | 1886/15000 [03:41<22:34,  9.68it/s]


 epoch: 1885 | train_loss: 0.39, train_acc: 95.0% | test_loss: 0.38, test_acc: 95.2%

 epoch: 1886 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.34, test_acc: 96.1%

 epoch: 1887 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.36, test_acc: 95.8%


 13%|█▎        | 1888/15000 [03:41<21:30, 10.16it/s]


 epoch: 1888 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.36, test_acc: 95.7%

 epoch: 1889 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.32, test_acc: 95.9%

input:       clark wissler published discouraging results suggesting that mental testing of columbia and barnard students failed to predict academic performance

target:      clark wissler published discouraging results suggesting that mental testing of columbia and barnard students failed to predict academic performance in

prediction:  clark stylistic published discouraging results suggesting that mental testing of columbia and barnard students failed to predict academic performance the


 13%|█▎        | 1890/15000 [03:42<21:41, 10.08it/s]


 epoch: 1890 | train_loss: 0.33, train_acc: 95.8% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1891 | train_loss: 0.39, train_acc: 95.3% | test_loss: 0.36, test_acc: 95.9%


 13%|█▎        | 1894/15000 [03:42<22:28,  9.72it/s]


 epoch: 1892 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.39, test_acc: 95.1%

 epoch: 1893 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1894 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.36, test_acc: 95.8%


 13%|█▎        | 1896/15000 [03:42<20:59, 10.40it/s]


 epoch: 1895 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.35, test_acc: 95.8%

 epoch: 1896 | train_loss: 0.32, train_acc: 96.0% | test_loss: 0.35, test_acc: 95.7%

 epoch: 1897 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.34, test_acc: 95.8%


 13%|█▎        | 1898/15000 [03:42<20:25, 10.70it/s]


 epoch: 1898 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.33, test_acc: 96.3%

 epoch: 1899 | train_loss: 0.40, train_acc: 95.0% | test_loss: 0.40, test_acc: 95.3%

input:       carlos filipe ximenes belo of timor leste kim dae jung and japanese scientists most of the said awardees are

target:      carlos filipe ximenes belo of timor leste kim dae jung and japanese scientists most of the said awardees are from

prediction:  carlos baltica laurel belo of timor leste kim dae jung and japanese scientists most of the said awardees are the


 13%|█▎        | 1902/15000 [03:43<20:20, 10.73it/s]


 epoch: 1900 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.36, test_acc: 95.8%

 epoch: 1901 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.36, test_acc: 95.5%

 epoch: 1902 | train_loss: 0.38, train_acc: 95.3% | test_loss: 0.36, test_acc: 95.5%


 13%|█▎        | 1904/15000 [03:43<20:13, 10.79it/s]


 epoch: 1903 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.39, test_acc: 95.3%

 epoch: 1904 | train_loss: 0.36, train_acc: 96.0% | test_loss: 0.37, test_acc: 95.3%

 epoch: 1905 | train_loss: 0.39, train_acc: 95.6% | test_loss: 0.39, test_acc: 95.2%


 13%|█▎        | 1908/15000 [03:44<29:07,  7.49it/s]


 epoch: 1906 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.36, test_acc: 95.4%

 epoch: 1907 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.34, test_acc: 95.4%

 epoch: 1908 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.33, test_acc: 95.8%


 13%|█▎        | 1910/15000 [03:44<26:41,  8.17it/s]


 epoch: 1909 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.37, test_acc: 95.4%

input:       also has an active community theater culture american folk music encompasses numerous music genres variously known as traditional music

target:      also has an active community theater culture american folk music encompasses numerous music genres variously known as traditional music traditional

prediction:  also has an active community theater culture american folk music encompasses numerous music genres variously known as traditional music the

 epoch: 1910 | train_loss: 0.41, train_acc: 94.8% | test_loss: 0.40, test_acc: 95.3%

 epoch: 1911 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.36, test_acc: 95.5%


 13%|█▎        | 1914/15000 [03:44<22:22,  9.74it/s]


 epoch: 1912 | train_loss: 0.37, train_acc: 95.9% | test_loss: 0.40, test_acc: 95.2%

 epoch: 1913 | train_loss: 0.38, train_acc: 95.9% | test_loss: 0.36, test_acc: 95.5%

 epoch: 1914 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.39, test_acc: 95.3%


 13%|█▎        | 1916/15000 [03:44<21:19, 10.22it/s]


 epoch: 1915 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.40, test_acc: 95.4%

 epoch: 1916 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.36, test_acc: 95.7%

 epoch: 1917 | train_loss: 0.38, train_acc: 95.7% | test_loss: 0.36, test_acc: 95.6%


 13%|█▎        | 1918/15000 [03:45<20:44, 10.51it/s]


 epoch: 1918 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.38, test_acc: 95.2%

 epoch: 1919 | train_loss: 0.32, train_acc: 95.9% | test_loss: 0.37, test_acc: 95.9%

input:       its capital at babylon akkad was city and its surrounding region near babylon akkad also became the capital of

target:      its capital at babylon akkad was city and its surrounding region near babylon akkad also became the capital of the

prediction:  its capital at babylon akkad was city and its surrounding region near babylon akkad also became the capital of the


 13%|█▎        | 1922/15000 [03:45<30:30,  7.14it/s]


 epoch: 1920 | train_loss: 0.37, train_acc: 96.0% | test_loss: 0.35, test_acc: 95.5%

 epoch: 1921 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.34, test_acc: 95.9%

 epoch: 1922 | train_loss: 0.43, train_acc: 94.7% | test_loss: 0.36, test_acc: 95.4%


 13%|█▎        | 1924/15000 [03:46<26:46,  8.14it/s]


 epoch: 1923 | train_loss: 0.38, train_acc: 95.3% | test_loss: 0.34, test_acc: 95.9%

 epoch: 1924 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.36, test_acc: 95.8%

 epoch: 1925 | train_loss: 0.40, train_acc: 95.0% | test_loss: 0.35, test_acc: 95.8%


 13%|█▎        | 1928/15000 [03:46<24:02,  9.06it/s]


 epoch: 1926 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.35, test_acc: 95.4%

 epoch: 1927 | train_loss: 0.37, train_acc: 95.3% | test_loss: 0.30, test_acc: 96.1%

 epoch: 1928 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.39, test_acc: 95.7%


 13%|█▎        | 1930/15000 [03:46<25:13,  8.63it/s]


 epoch: 1929 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.34, test_acc: 95.9%

input:       care system far outspends that of any other nation measured both in per capita spending and as percentage of

target:      care system far outspends that of any other nation measured both in per capita spending and as percentage of gdp

prediction:  care system far outspends that of any other nation measured both in per capita spending and as percentage of the

 epoch: 1930 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.40, test_acc: 95.4%


 13%|█▎        | 1931/15000 [03:46<24:43,  8.81it/s]


 epoch: 1931 | train_loss: 0.36, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.7%

 epoch: 1932 | train_loss: 0.36, train_acc: 95.2% | test_loss: 0.39, test_acc: 95.5%


 13%|█▎        | 1934/15000 [03:47<25:56,  8.39it/s]


 epoch: 1933 | train_loss: 0.36, train_acc: 95.2% | test_loss: 0.37, test_acc: 95.6%

 epoch: 1934 | train_loss: 0.35, train_acc: 95.6% | test_loss: 0.39, test_acc: 95.2%


 13%|█▎        | 1936/15000 [03:47<25:28,  8.55it/s]


 epoch: 1935 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.37, test_acc: 95.6%

 epoch: 1936 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.41, test_acc: 95.2%


 13%|█▎        | 1938/15000 [03:47<24:58,  8.71it/s]


 epoch: 1937 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.37, test_acc: 95.5%

 epoch: 1938 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.38, test_acc: 95.2%


 13%|█▎        | 1940/15000 [03:47<27:03,  8.04it/s]


 epoch: 1939 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.32, test_acc: 96.1%

input:       and the egyptian hieroglyphs are generally considered to be the earliest writing systems both emerging out of their ancestral

target:      and the egyptian hieroglyphs are generally considered to be the earliest writing systems both emerging out of their ancestral proto

prediction:  and the egyptian hieroglyphs are generally considered to be the earliest writing systems both emerging out of their ancestral the

 epoch: 1940 | train_loss: 0.37, train_acc: 95.8% | test_loss: 0.34, test_acc: 96.4%


 13%|█▎        | 1943/15000 [03:48<23:55,  9.10it/s]


 epoch: 1941 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.42, test_acc: 94.8%

 epoch: 1942 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.40, test_acc: 94.9%

 epoch: 1943 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.39, test_acc: 95.5%


 13%|█▎        | 1946/15000 [03:48<22:35,  9.63it/s]


 epoch: 1944 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.32, test_acc: 96.0%

 epoch: 1945 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.33, test_acc: 96.2%

 epoch: 1946 | train_loss: 0.37, train_acc: 95.4% | test_loss: 0.39, test_acc: 95.0%


 13%|█▎        | 1947/15000 [03:48<23:44,  9.16it/s]


 epoch: 1947 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.35, test_acc: 95.5%


 13%|█▎        | 1949/15000 [03:49<38:45,  5.61it/s]


 epoch: 1948 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.38, test_acc: 95.4%

 epoch: 1949 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.35, test_acc: 95.7%

input:       the works created by leonardo in the th century is the small portrait known as the mona lisa or

target:      the works created by leonardo in the th century is the small portrait known as the mona lisa or la


 13%|█▎        | 1951/15000 [03:49<33:52,  6.42it/s]


prediction:  the works created by leonardo in the th century is the small portrait known as the mona lisa or the

 epoch: 1950 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.35, test_acc: 95.9%

 epoch: 1951 | train_loss: 0.37, train_acc: 95.8% | test_loss: 0.36, test_acc: 95.8%


 13%|█▎        | 1953/15000 [03:49<28:32,  7.62it/s]


 epoch: 1952 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.40, test_acc: 95.1%

 epoch: 1953 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.33, test_acc: 96.3%


 13%|█▎        | 1957/15000 [03:50<22:55,  9.48it/s]


 epoch: 1954 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.36, test_acc: 96.0%

 epoch: 1955 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.39, test_acc: 95.2%

 epoch: 1956 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.39, test_acc: 95.4%

 epoch: 1957 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.44, test_acc: 94.6%


 13%|█▎        | 1959/15000 [03:50<21:36, 10.06it/s]


 epoch: 1958 | train_loss: 0.36, train_acc: 95.6% | test_loss: 0.41, test_acc: 94.9%

 epoch: 1959 | train_loss: 0.36, train_acc: 96.1% | test_loss: 0.33, test_acc: 96.1%

input:       the actual rulers were military strongmen who took the title of magister militum patrician or both stilicho from to

target:      the actual rulers were military strongmen who took the title of magister militum patrician or both stilicho from to constantius

prediction:  the actual rulers were military laurasia who took the title of magister militum patrician or both version from to the


 13%|█▎        | 1961/15000 [03:50<22:33,  9.64it/s]


 epoch: 1960 | train_loss: 0.37, train_acc: 95.3% | test_loss: 0.37, test_acc: 95.5%

 epoch: 1961 | train_loss: 0.36, train_acc: 96.0% | test_loss: 0.38, test_acc: 95.4%


 13%|█▎        | 1964/15000 [03:51<33:07,  6.56it/s]


 epoch: 1962 | train_loss: 0.37, train_acc: 95.7% | test_loss: 0.35, test_acc: 96.0%

 epoch: 1963 | train_loss: 0.35, train_acc: 96.0% | test_loss: 0.36, test_acc: 95.9%

 epoch: 1964 | train_loss: 0.37, train_acc: 95.9% | test_loss: 0.37, test_acc: 95.9%


 13%|█▎        | 1966/15000 [03:51<28:03,  7.74it/s]


 epoch: 1965 | train_loss: 0.40, train_acc: 95.6% | test_loss: 0.35, test_acc: 95.5%

 epoch: 1966 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.38, test_acc: 95.6%

 epoch: 1967 | train_loss: 0.34, train_acc: 95.6% | test_loss: 0.34, test_acc: 95.6%


 13%|█▎        | 1968/15000 [03:51<24:58,  8.70it/s]


 epoch: 1968 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1969 | train_loss: 0.35, train_acc: 95.6% | test_loss: 0.37, test_acc: 95.6%

input:       adopted typescript for its ui redesign reddit search function has had many iterations and currently uses lucidworks fusion to

target:      adopted typescript for its ui redesign reddit search function has had many iterations and currently uses lucidworks fusion to implementation

prediction:  adopted armed for its newspapers redesign reddit search function has had many iterations and currently uses gender fusion to the


 13%|█▎        | 1972/15000 [03:51<22:21,  9.71it/s]


 epoch: 1970 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.39, test_acc: 95.5%

 epoch: 1971 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.36, test_acc: 95.9%

 epoch: 1972 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.33, test_acc: 96.2%


 13%|█▎        | 1974/15000 [03:52<21:06, 10.28it/s]


 epoch: 1973 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1974 | train_loss: 0.40, train_acc: 95.3% | test_loss: 0.38, test_acc: 95.5%

 epoch: 1975 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.35, test_acc: 95.9%


 13%|█▎        | 1978/15000 [03:52<21:38, 10.03it/s]


 epoch: 1976 | train_loss: 0.35, train_acc: 95.6% | test_loss: 0.38, test_acc: 95.9%

 epoch: 1977 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.36, test_acc: 95.6%

 epoch: 1978 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.35, test_acc: 95.9%


 13%|█▎        | 1980/15000 [03:52<21:23, 10.14it/s]


 epoch: 1979 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.38, test_acc: 95.2%

input:       sometimes universal and sometimes specific to particular language which underlie its forms cognitive linguistics is primarily concerned with how

target:      sometimes universal and sometimes specific to particular language which underlie its forms cognitive linguistics is primarily concerned with how the

prediction:  sometimes universal and sometimes specific to particular language which underlie its forms cognitive linguistics is primarily concerned with how the

 epoch: 1980 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.30, test_acc: 96.3%

 epoch: 1981 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.37, test_acc: 95.8%


 13%|█▎        | 1984/15000 [03:53<19:48, 10.95it/s]


 epoch: 1982 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.32, test_acc: 96.3%

 epoch: 1983 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.34, test_acc: 95.9%

 epoch: 1984 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.33, test_acc: 96.0%


 13%|█▎        | 1986/15000 [03:53<19:22, 11.19it/s]


 epoch: 1985 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.35, test_acc: 95.4%

 epoch: 1986 | train_loss: 0.37, train_acc: 95.4% | test_loss: 0.35, test_acc: 95.9%

 epoch: 1987 | train_loss: 0.38, train_acc: 95.5% | test_loss: 0.40, test_acc: 95.1%


 13%|█▎        | 1988/15000 [03:53<19:23, 11.18it/s]


 epoch: 1988 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.34, test_acc: 96.2%

 epoch: 1989 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.38, test_acc: 95.5%

input:       these are the native commands in the powershell stack cmdlets follow verb noun naming pattern such as get childitem

target:      these are the native commands in the powershell stack cmdlets follow verb noun naming pattern such as get childitem which

prediction:  these are the native commands in the powershell stack cmdlets follow verb noun naming pattern such as get fragments the


 13%|█▎        | 1990/15000 [03:53<20:17, 10.69it/s]


 epoch: 1990 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.37, test_acc: 95.8%


 13%|█▎        | 1992/15000 [03:54<33:42,  6.43it/s]


 epoch: 1991 | train_loss: 0.35, train_acc: 96.2% | test_loss: 0.37, test_acc: 95.5%

 epoch: 1992 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.37, test_acc: 95.5%

 epoch: 1993 | train_loss: 0.36, train_acc: 95.6% | test_loss: 0.36, test_acc: 95.7%


 13%|█▎        | 1996/15000 [03:54<25:54,  8.36it/s]


 epoch: 1994 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.33, test_acc: 96.3%

 epoch: 1995 | train_loss: 0.37, train_acc: 95.7% | test_loss: 0.32, test_acc: 96.2%

 epoch: 1996 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.36, test_acc: 95.7%


 13%|█▎        | 1998/15000 [03:54<23:42,  9.14it/s]


 epoch: 1997 | train_loss: 0.35, train_acc: 96.1% | test_loss: 0.35, test_acc: 95.8%

 epoch: 1998 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.39, test_acc: 95.3%

 epoch: 1999 | train_loss: 0.39, train_acc: 95.2% | test_loss: 0.36, test_acc: 95.8%


 13%|█▎        | 2000/15000 [03:54<23:51,  9.08it/s]


input:       five octopus like arms the character was created by graphic designer simon oxley as clip art to sell on

target:      five octopus like arms the character was created by graphic designer simon oxley as clip art to sell on istock

prediction:  five octopus like arms the character was created by graphic designer simon oxley as clip art to sell on the

 epoch: 2000 | train_loss: 0.36, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.6%

 epoch: 2001 | train_loss: 0.35, train_acc: 96.0% | test_loss: 0.37, test_acc: 95.5%


 13%|█▎        | 2004/15000 [03:55<21:02, 10.29it/s]


 epoch: 2002 | train_loss: 0.33, train_acc: 95.7% | test_loss: 0.38, test_acc: 95.5%

 epoch: 2003 | train_loss: 0.41, train_acc: 94.9% | test_loss: 0.39, test_acc: 95.6%

 epoch: 2004 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.37, test_acc: 95.5%


 13%|█▎        | 2006/15000 [03:55<22:41,  9.54it/s]


 epoch: 2005 | train_loss: 0.34, train_acc: 95.7% | test_loss: 0.34, test_acc: 96.0%

 epoch: 2006 | train_loss: 0.34, train_acc: 95.7% | test_loss: 0.36, test_acc: 95.7%

 epoch: 2007 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.33, test_acc: 96.2%


 13%|█▎        | 2010/15000 [03:55<21:14, 10.20it/s]


 epoch: 2008 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.36, test_acc: 95.6%

 epoch: 2009 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.36, test_acc: 95.8%

input:       dense at this time far denser than is usually required to form black hole the universe did not re

target:      dense at this time far denser than is usually required to form black hole the universe did not re collapse

prediction:  dense at this time far denser than is usually required to form black hole the universe did not re the

 epoch: 2010 | train_loss: 0.34, train_acc: 96.5% | test_loss: 0.34, test_acc: 95.9%


 13%|█▎        | 2012/15000 [03:56<20:18, 10.66it/s]


 epoch: 2011 | train_loss: 0.34, train_acc: 95.7% | test_loss: 0.37, test_acc: 95.5%

 epoch: 2012 | train_loss: 0.36, train_acc: 96.0% | test_loss: 0.35, test_acc: 95.4%

 epoch: 2013 | train_loss: 0.36, train_acc: 95.9% | test_loss: 0.34, test_acc: 96.0%


 13%|█▎        | 2016/15000 [03:56<19:40, 11.00it/s]


 epoch: 2014 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2015 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2016 | train_loss: 0.37, train_acc: 95.8% | test_loss: 0.35, test_acc: 95.7%


 13%|█▎        | 2018/15000 [03:56<19:34, 11.05it/s]


 epoch: 2017 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.33, test_acc: 96.2%

 epoch: 2018 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.34, test_acc: 95.8%


 13%|█▎        | 2020/15000 [03:57<36:03,  6.00it/s]


 epoch: 2019 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.2%

input:       large centralized ancient civilisations developed in the western hemisphere both in mesoamerica and western south america beyond these areas

target:      large centralized ancient civilisations developed in the western hemisphere both in mesoamerica and western south america beyond these areas the

prediction:  large centralized ancient civilisations developed in the western hemisphere both in mesoamerica and western south america beyond these areas the

 epoch: 2020 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.37, test_acc: 95.6%

 epoch: 2021 | train_loss: 0.36, train_acc: 95.9% | test_loss: 0.35, test_acc: 95.9%


 13%|█▎        | 2024/15000 [03:57<26:36,  8.13it/s]


 epoch: 2022 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.40, test_acc: 95.5%

 epoch: 2023 | train_loss: 0.35, train_acc: 95.6% | test_loss: 0.33, test_acc: 96.3%

 epoch: 2024 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.37, test_acc: 95.6%


 14%|█▎        | 2026/15000 [03:57<23:54,  9.04it/s]


 epoch: 2025 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.35, test_acc: 95.9%

 epoch: 2026 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2027 | train_loss: 0.35, train_acc: 95.5% | test_loss: 0.39, test_acc: 95.4%


 14%|█▎        | 2028/15000 [03:57<22:10,  9.75it/s]


 epoch: 2028 | train_loss: 0.36, train_acc: 96.0% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2029 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.38, test_acc: 95.3%

input:       that he had proposed the idea to von strahlenberg the latter had suggested the emba river as the lower

target:      that he had proposed the idea to von strahlenberg the latter had suggested the emba river as the lower boundary

prediction:  that he had proposed the idea to von strahlenberg the latter had suggested the emba river as the lower the


 14%|█▎        | 2032/15000 [03:58<21:37, 10.00it/s]


 epoch: 2030 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.33, test_acc: 95.6%

 epoch: 2031 | train_loss: 0.33, train_acc: 95.7% | test_loss: 0.39, test_acc: 95.2%

 epoch: 2032 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2033 | train_loss: 0.32, train_acc: 96.0% | test_loss: 0.40, test_acc: 94.9%


 14%|█▎        | 2036/15000 [03:59<30:24,  7.10it/s]


 epoch: 2034 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.35, test_acc: 95.7%

 epoch: 2035 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.33, test_acc: 96.0%

 epoch: 2036 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.38, test_acc: 95.5%


 14%|█▎        | 2038/15000 [03:59<26:41,  8.09it/s]


 epoch: 2037 | train_loss: 0.36, train_acc: 96.0% | test_loss: 0.34, test_acc: 96.0%

 epoch: 2038 | train_loss: 0.35, train_acc: 95.6% | test_loss: 0.40, test_acc: 95.0%

 epoch: 2039 | train_loss: 0.31, train_acc: 96.7% | test_loss: 0.36, test_acc: 95.7%


 14%|█▎        | 2042/15000 [03:59<22:49,  9.46it/s]


input:       for export the pacific coastal waters of south america are the most important for commercial fishing the anchovy catch

target:      for export the pacific coastal waters of south america are the most important for commercial fishing the anchovy catch reaches

prediction:  for export the pacific coastal waters of south america are the most important for commercial fishing the narrative catch the

 epoch: 2040 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.34, test_acc: 95.5%

 epoch: 2041 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.29, test_acc: 96.3%

 epoch: 2042 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.33, test_acc: 96.0%


 14%|█▎        | 2044/15000 [03:59<21:27, 10.06it/s]


 epoch: 2043 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2044 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2045 | train_loss: 0.37, train_acc: 95.7% | test_loss: 0.35, test_acc: 96.1%


 14%|█▎        | 2046/15000 [04:00<21:43,  9.94it/s]


 epoch: 2046 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.36, test_acc: 95.8%

 epoch: 2047 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.34, test_acc: 95.8%


 14%|█▎        | 2049/15000 [04:00<33:53,  6.37it/s]


 epoch: 2048 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.34, test_acc: 96.1%

 epoch: 2049 | train_loss: 0.34, train_acc: 95.6% | test_loss: 0.33, test_acc: 95.9%

input:       of the americas generally south of the where the romance languages derived from latin of spanish and portuguese

target:      of the americas generally south of the where the romance languages derived from latin of spanish and portuguese but

prediction:  of the americas generally south of the where the romance languages derived from latin of spanish and portuguese the


 14%|█▎        | 2050/15000 [04:00<33:23,  6.46it/s]


 epoch: 2050 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.35, test_acc: 95.6%

 epoch: 2051 | train_loss: 0.38, train_acc: 95.1% | test_loss: 0.36, test_acc: 95.6%


 14%|█▎        | 2053/15000 [04:01<27:53,  7.74it/s]


 epoch: 2052 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.40, test_acc: 95.7%

 epoch: 2053 | train_loss: 0.34, train_acc: 95.7% | test_loss: 0.36, test_acc: 95.6%


 14%|█▎        | 2055/15000 [04:01<26:08,  8.25it/s]


 epoch: 2054 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.32, test_acc: 96.0%

 epoch: 2055 | train_loss: 0.36, train_acc: 95.9% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2056 | train_loss: 0.39, train_acc: 95.4% | test_loss: 0.36, test_acc: 95.5%


 14%|█▎        | 2058/15000 [04:01<25:12,  8.55it/s]


 epoch: 2057 | train_loss: 0.39, train_acc: 95.5% | test_loss: 0.31, test_acc: 96.2%

 epoch: 2058 | train_loss: 0.36, train_acc: 96.0% | test_loss: 0.37, test_acc: 95.5%


 14%|█▎        | 2060/15000 [04:02<28:01,  7.69it/s]


 epoch: 2059 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.35, test_acc: 95.5%

input:       the cultivated leisure otium associated with the villa lifestyle significant collections might attract in house scholars and an individual

target:      the cultivated leisure otium associated with the villa lifestyle significant collections might attract in house scholars and an individual benefactor

prediction:  the cultivated leisure otium associated with the villa lifestyle significant collections might attract in house scholars and an individual the

 epoch: 2060 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.38, test_acc: 95.3%


 14%|█▎        | 2061/15000 [04:02<27:18,  7.90it/s]


 epoch: 2061 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.34, test_acc: 95.7%


 14%|█▍        | 2063/15000 [04:02<37:21,  5.77it/s]


 epoch: 2062 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.36, test_acc: 95.5%

 epoch: 2063 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.34, test_acc: 95.9%


 14%|█▍        | 2065/15000 [04:02<30:39,  7.03it/s]


 epoch: 2064 | train_loss: 0.35, train_acc: 94.7% | test_loss: 0.33, test_acc: 96.0%

 epoch: 2065 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.33, test_acc: 95.8%


 14%|█▍        | 2068/15000 [04:03<25:34,  8.43it/s]


 epoch: 2066 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.35, test_acc: 96.0%

 epoch: 2067 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.32, test_acc: 95.8%

 epoch: 2068 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.33, test_acc: 96.3%


 14%|█▍        | 2070/15000 [04:03<25:41,  8.39it/s]


 epoch: 2069 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.36, test_acc: 95.5%

input:       o and is vital for all known forms of life in typical usage water refers only to its liquid

target:      o and is vital for all known forms of life in typical usage water refers only to its liquid form

prediction:  o and is vital for all known forms of life in typical usage water refers only to its liquid the

 epoch: 2070 | train_loss: 0.34, train_acc: 95.6% | test_loss: 0.30, test_acc: 96.3%


 14%|█▍        | 2073/15000 [04:03<22:36,  9.53it/s]


 epoch: 2071 | train_loss: 0.35, train_acc: 95.6% | test_loss: 0.38, test_acc: 95.5%

 epoch: 2072 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2073 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.34, test_acc: 96.2%


 14%|█▍        | 2076/15000 [04:04<21:12, 10.16it/s]


 epoch: 2074 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2075 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.34, test_acc: 96.1%

 epoch: 2076 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.36, test_acc: 95.7%


 14%|█▍        | 2078/15000 [04:04<36:05,  5.97it/s]


 epoch: 2077 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2078 | train_loss: 0.33, train_acc: 96.5% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2079 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.35, test_acc: 96.1%


 14%|█▍        | 2080/15000 [04:04<31:15,  6.89it/s]


input:       and operationalization of important constructs is an essential part of these research designs although this type of psychological research

target:      and operationalization of important constructs is an essential part of these research designs although this type of psychological research is

prediction:  and mexican of important constructs is an essential part of these research designs although this type of psychological research the

 epoch: 2080 | train_loss: 0.35, train_acc: 96.0% | test_loss: 0.36, test_acc: 95.9%

 epoch: 2081 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.34, test_acc: 95.8%


 14%|█▍        | 2084/15000 [04:05<24:21,  8.84it/s]


 epoch: 2082 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.38, test_acc: 95.9%

 epoch: 2083 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.37, test_acc: 95.8%

 epoch: 2084 | train_loss: 0.36, train_acc: 96.2% | test_loss: 0.35, test_acc: 95.9%


 14%|█▍        | 2086/15000 [04:05<22:51,  9.41it/s]


 epoch: 2085 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.33, test_acc: 95.7%

 epoch: 2086 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.32, test_acc: 96.0%

 epoch: 2087 | train_loss: 0.33, train_acc: 95.7% | test_loss: 0.39, test_acc: 95.6%


 14%|█▍        | 2088/15000 [04:05<21:44,  9.90it/s]


 epoch: 2088 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2089 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.35, test_acc: 96.0%

input:       underlying the theory and practice of education it studies education both as process and as discipline while trying to

target:      underlying the theory and practice of education it studies education both as process and as discipline while trying to provide

prediction:  underlying the theory and practice of education it studies education both as process and as discipline while trying to the


 14%|█▍        | 2090/15000 [04:05<22:25,  9.60it/s]


 epoch: 2090 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.34, test_acc: 95.6%


 14%|█▍        | 2092/15000 [04:06<36:01,  5.97it/s]


 epoch: 2091 | train_loss: 0.33, train_acc: 95.8% | test_loss: 0.36, test_acc: 95.9%

 epoch: 2092 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.34, test_acc: 96.1%

 epoch: 2093 | train_loss: 0.33, train_acc: 95.7% | test_loss: 0.34, test_acc: 95.7%


 14%|█▍        | 2096/15000 [04:06<26:39,  8.07it/s]


 epoch: 2094 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.31, test_acc: 96.2%

 epoch: 2095 | train_loss: 0.32, train_acc: 95.9% | test_loss: 0.35, test_acc: 95.9%

 epoch: 2096 | train_loss: 0.34, train_acc: 96.2% | test_loss: 0.31, test_acc: 96.4%


 14%|█▍        | 2098/15000 [04:06<24:07,  8.91it/s]


 epoch: 2097 | train_loss: 0.39, train_acc: 95.8% | test_loss: 0.31, test_acc: 96.0%

 epoch: 2098 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.34, test_acc: 95.9%

 epoch: 2099 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.4%


 14%|█▍        | 2100/15000 [04:07<23:54,  8.99it/s]


input:       lived under the threat of lynching and other vigilante violence national infrastructure including telegraph and transcontinental railroads spurred economic

target:      lived under the threat of lynching and other vigilante violence national infrastructure including telegraph and transcontinental railroads spurred economic growth

prediction:  lived under the threat of knows and other skeletal violence national infrastructure including considerable and transcontinental railroads spurred economic the

 epoch: 2100 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.32, test_acc: 96.0%

 epoch: 2101 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.34, test_acc: 96.3%


 14%|█▍        | 2104/15000 [04:07<21:57,  9.79it/s]


 epoch: 2102 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.37, test_acc: 95.5%

 epoch: 2103 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.35, test_acc: 96.0%

 epoch: 2104 | train_loss: 0.33, train_acc: 95.7% | test_loss: 0.31, test_acc: 96.4%


 14%|█▍        | 2106/15000 [04:07<22:45,  9.44it/s]


 epoch: 2105 | train_loss: 0.37, train_acc: 95.7% | test_loss: 0.34, test_acc: 95.9%

 epoch: 2106 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.34, test_acc: 95.5%

 epoch: 2107 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.35, test_acc: 95.9%


 14%|█▍        | 2110/15000 [04:08<21:15, 10.10it/s]


 epoch: 2108 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.36, test_acc: 95.6%

 epoch: 2109 | train_loss: 0.38, train_acc: 95.2% | test_loss: 0.35, test_acc: 95.8%

input:       asia and north africa had existed as an intellectual concept since classical antiquity the belief in such land lasted

target:      asia and north africa had existed as an intellectual concept since classical antiquity the belief in such land lasted until

prediction:  asia and north africa had existed as an intellectual concept since classical antiquity the belief in such land lasted the

 epoch: 2110 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.34, test_acc: 95.9%


 14%|█▍        | 2112/15000 [04:08<20:20, 10.56it/s]


 epoch: 2111 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.33, test_acc: 96.3%

 epoch: 2112 | train_loss: 0.35, train_acc: 96.1% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2113 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.34, test_acc: 96.0%


 14%|█▍        | 2116/15000 [04:08<19:45, 10.86it/s]


 epoch: 2114 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2115 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.36, test_acc: 95.4%

 epoch: 2116 | train_loss: 0.33, train_acc: 96.4% | test_loss: 0.38, test_acc: 95.5%


 14%|█▍        | 2118/15000 [04:08<19:49, 10.83it/s]


 epoch: 2117 | train_loss: 0.41, train_acc: 95.5% | test_loss: 0.35, test_acc: 95.8%

 epoch: 2118 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.34, test_acc: 95.6%

 epoch: 2119 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.35, test_acc: 95.8%

input:       heine and norenzayan reported bias in conducting psychology studies with participants from weird western educated industrialized rich and democratic

target:      heine and norenzayan reported bias in conducting psychology studies with participants from weird western educated industrialized rich and democratic societies

prediction:  afford and hipparchus reported bias in conducting psychology studies with participants from weird western educated industrialized rich and democratic the


 14%|█▍        | 2122/15000 [04:09<28:58,  7.41it/s]


 epoch: 2120 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.37, test_acc: 95.5%

 epoch: 2121 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.36, test_acc: 95.7%

 epoch: 2122 | train_loss: 0.35, train_acc: 96.2% | test_loss: 0.33, test_acc: 96.1%


 14%|█▍        | 2124/15000 [04:09<25:40,  8.36it/s]


 epoch: 2123 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2124 | train_loss: 0.38, train_acc: 95.4% | test_loss: 0.37, test_acc: 95.3%

 epoch: 2125 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.32, test_acc: 96.0%


 14%|█▍        | 2128/15000 [04:10<21:45,  9.86it/s]


 epoch: 2126 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.37, test_acc: 95.9%

 epoch: 2127 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2128 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.2%


 14%|█▍        | 2130/15000 [04:10<21:47,  9.85it/s]


 epoch: 2129 | train_loss: 0.34, train_acc: 95.3% | test_loss: 0.37, test_acc: 95.4%

input:       have carried claims of well being benefits to workers improved customer experiences and an increase in productivity that organisations

target:      have carried claims of well being benefits to workers improved customer experiences and an increase in productivity that organisations can

prediction:  have carried claims of well being benefits to workers improved deletion experiences and an increase in productivity that organisations the

 epoch: 2130 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.38, test_acc: 95.2%


 14%|█▍        | 2132/15000 [04:10<21:22, 10.03it/s]


 epoch: 2131 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2132 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.35, test_acc: 95.9%

 epoch: 2133 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.37, test_acc: 95.5%


 14%|█▍        | 2136/15000 [04:11<29:49,  7.19it/s]


 epoch: 2134 | train_loss: 0.35, train_acc: 96.0% | test_loss: 0.33, test_acc: 96.0%

 epoch: 2135 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2136 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.33, test_acc: 95.9%


 14%|█▍        | 2139/15000 [04:11<25:00,  8.57it/s]


 epoch: 2137 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.36, test_acc: 95.3%

 epoch: 2138 | train_loss: 0.37, train_acc: 95.6% | test_loss: 0.36, test_acc: 95.8%

 epoch: 2139 | train_loss: 0.32, train_acc: 96.5% | test_loss: 0.32, test_acc: 96.2%


 14%|█▍        | 2141/15000 [04:11<24:04,  8.90it/s]


input:       wilderness areas can be found in preserves estates farms conservation preserves ranches national forests national parks and even in

target:      wilderness areas can be found in preserves estates farms conservation preserves ranches national forests national parks and even in urban

prediction:  wilderness areas can be found in preserves estates farms conservation preserves atkinson national forests national parks and even in the

 epoch: 2140 | train_loss: 0.34, train_acc: 96.3% | test_loss: 0.37, test_acc: 95.8%

 epoch: 2141 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.33, test_acc: 95.8%


 14%|█▍        | 2143/15000 [04:11<22:23,  9.57it/s]


 epoch: 2142 | train_loss: 0.38, train_acc: 95.7% | test_loss: 0.35, test_acc: 96.1%

 epoch: 2143 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.33, test_acc: 96.2%

 epoch: 2144 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.36, test_acc: 96.0%


 14%|█▍        | 2145/15000 [04:12<21:31,  9.95it/s]


 epoch: 2145 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.37, test_acc: 95.5%

 epoch: 2146 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.32, test_acc: 96.3%


 14%|█▍        | 2148/15000 [04:12<29:27,  7.27it/s]


 epoch: 2147 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2148 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2149 | train_loss: 0.35, train_acc: 96.2% | test_loss: 0.36, test_acc: 96.0%


 14%|█▍        | 2150/15000 [04:12<26:47,  7.99it/s]


input:       march microsoft required that all players migrate in order to access the java edition of minecraft deadline of september

target:      march microsoft required that all players migrate in order to access the java edition of minecraft deadline of september was

prediction:  march microsoft required that all players migrate in order to access the java edition of minecraft deadline of september the

 epoch: 2150 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.35, test_acc: 95.8%

 epoch: 2151 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.36, test_acc: 95.8%


 14%|█▍        | 2154/15000 [04:13<22:07,  9.68it/s]


 epoch: 2152 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.33, test_acc: 95.8%

 epoch: 2153 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.35, test_acc: 95.9%

 epoch: 2154 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.35, test_acc: 95.8%


 14%|█▍        | 2156/15000 [04:13<20:51, 10.26it/s]


 epoch: 2155 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.32, test_acc: 95.9%

 epoch: 2156 | train_loss: 0.34, train_acc: 95.6% | test_loss: 0.34, test_acc: 96.1%

 epoch: 2157 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.33, test_acc: 96.3%


 14%|█▍        | 2158/15000 [04:13<21:29,  9.96it/s]


 epoch: 2158 | train_loss: 0.36, train_acc: 95.4% | test_loss: 0.33, test_acc: 95.8%

 epoch: 2159 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.34, test_acc: 96.1%

input:       the following language fragment is syntactically correct but performs operations that are not semantically defined the operation has no

target:     

 14%|█▍        | 2161/15000 [04:14<23:59,  8.92it/s]

 the following language fragment is syntactically correct but performs operations that are not semantically defined the operation has no meaning

prediction:  the following language fragment is syntactically correct but performs operations that are not semantically defined the operation has no the

 epoch: 2160 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2161 | train_loss: 0.32, train_acc: 96.0% | test_loss: 0.35, test_acc: 96.3%


 14%|█▍        | 2162/15000 [04:14<23:48,  8.99it/s]


 epoch: 2162 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.33, test_acc: 96.2%


 14%|█▍        | 2164/15000 [04:14<39:09,  5.46it/s]


 epoch: 2163 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.35, test_acc: 95.7%

 epoch: 2164 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.34, test_acc: 96.4%


 14%|█▍        | 2167/15000 [04:15<29:18,  7.30it/s]


 epoch: 2165 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.37, test_acc: 95.6%

 epoch: 2166 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.34, test_acc: 95.8%

 epoch: 2167 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.33, test_acc: 96.1%


 14%|█▍        | 2168/15000 [04:15<28:00,  7.63it/s]


 epoch: 2168 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2169 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.35, test_acc: 95.8%

input:       to engineer the new man of socialism consequently university psychology departments trained large numbers of students in psychology at

target:      to engineer the new man of socialism consequently university psychology departments trained large numbers of students in psychology at the

prediction: 

 14%|█▍        | 2171/15000 [04:15<26:38,  8.02it/s]

 to engineer the new man of gradient consequently university psychology departments trained large numbers of students in psychology at the

 epoch: 2170 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.35, test_acc: 95.3%

 epoch: 2171 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.5%


 14%|█▍        | 2173/15000 [04:15<24:26,  8.75it/s]


 epoch: 2172 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.34, test_acc: 95.9%

 epoch: 2173 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.35, test_acc: 95.8%

 epoch: 2174 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.2%


 14%|█▍        | 2175/15000 [04:15<22:51,  9.35it/s]


 epoch: 2175 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.33, test_acc: 96.2%

 epoch: 2176 | train_loss: 0.33, train_acc: 95.7% | test_loss: 0.37, test_acc: 95.2%


 15%|█▍        | 2178/15000 [04:16<24:09,  8.85it/s]


 epoch: 2177 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.33, test_acc: 96.4%

 epoch: 2178 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.37, test_acc: 95.5%


 15%|█▍        | 2180/15000 [04:16<26:56,  7.93it/s]


 epoch: 2179 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.32, test_acc: 96.1%

input:       is the physical world or universe nature can refer to the phenomena of the physical world and also to

target:      is the physical world or universe nature can refer to the phenomena of the physical world and also to life

prediction:  is the physical world or universe nature can refer to the phenomena of the physical world and also to the

 epoch: 2180 | train_loss: 0.37, train_acc: 95.2% | test_loss: 0.34, test_acc: 95.9%


 15%|█▍        | 2182/15000 [04:16<25:31,  8.37it/s]


 epoch: 2181 | train_loss: 0.37, train_acc: 96.0% | test_loss: 0.34, test_acc: 95.9%

 epoch: 2182 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.4%


 15%|█▍        | 2184/15000 [04:17<24:48,  8.61it/s]


 epoch: 2183 | train_loss: 0.36, train_acc: 95.5% | test_loss: 0.33, test_acc: 96.2%

 epoch: 2184 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.3%


 15%|█▍        | 2186/15000 [04:17<24:21,  8.77it/s]


 epoch: 2185 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2186 | train_loss: 0.34, train_acc: 95.7% | test_loss: 0.31, test_acc: 96.4%


 15%|█▍        | 2188/15000 [04:17<25:46,  8.28it/s]


 epoch: 2187 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.33, test_acc: 95.9%

 epoch: 2188 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.35, test_acc: 96.0%


 15%|█▍        | 2190/15000 [04:17<24:46,  8.62it/s]


 epoch: 2189 | train_loss: 0.31, train_acc: 96.7% | test_loss: 0.34, test_acc: 95.9%

input:       of waikato are using the welsh language as model for their ori language revitalisation programme as they deem welsh

target:      of waikato are using the welsh language as model for their ori language revitalisation programme as they deem welsh to

prediction:  of waikato are using the welsh language as model for their ori language revitalisation programme as they decades welsh the

 epoch: 2190 | train_loss: 0.36, train_acc: 95.6% | test_loss: 0.35, test_acc: 96.3%


 15%|█▍        | 2192/15000 [04:18<25:08,  8.49it/s]


 epoch: 2191 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.33, test_acc: 95.7%

 epoch: 2192 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2193 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.37, test_acc: 95.3%


 15%|█▍        | 2196/15000 [04:18<21:07, 10.10it/s]


 epoch: 2194 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.36, test_acc: 95.9%

 epoch: 2195 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.34, test_acc: 96.1%

 epoch: 2196 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.3%


 15%|█▍        | 2198/15000 [04:18<20:17, 10.52it/s]


 epoch: 2197 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.36, test_acc: 95.7%

 epoch: 2198 | train_loss: 0.32, train_acc: 96.0% | test_loss: 0.37, test_acc: 95.3%

 epoch: 2199 | train_loss: 0.36, train_acc: 95.9% | test_loss: 0.32, test_acc: 96.5%


 15%|█▍        | 2200/15000 [04:18<21:02, 10.14it/s]


input:       code to github under the username freespeechenthusiast linus torvalds the original developer of the git software has highly praised

target:      code to github under the username freespeechenthusiast linus torvalds the original developer of the git software has highly praised github

prediction:  code to github under the username freespeechenthusiast linus torvalds the original developer of the git software has highly praised the

 epoch: 2200 | train_loss: 0.33, train_acc: 96.4% | test_loss: 0.40, test_acc: 95.3%

 epoch: 2201 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.30, test_acc: 96.3%


 15%|█▍        | 2204/15000 [04:19<20:49, 10.24it/s]


 epoch: 2202 | train_loss: 0.34, train_acc: 95.7% | test_loss: 0.35, test_acc: 95.6%

 epoch: 2203 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.37, test_acc: 95.9%

 epoch: 2204 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.29, test_acc: 96.9%

 epoch: 2205 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.3%


 15%|█▍        | 2208/15000 [04:19<29:24,  7.25it/s]


 epoch: 2206 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.35, test_acc: 95.8%

 epoch: 2207 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2208 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.31, test_acc: 96.3%


 15%|█▍        | 2210/15000 [04:20<27:02,  7.88it/s]


 epoch: 2209 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.32, test_acc: 96.3%

input:       languages are called fusional languages because several meanings may be fused into single morpheme the opposite of fusional languages

target:      languages are called fusional languages because several meanings may be fused into single morpheme the opposite of fusional languages are

prediction:  languages are called fusional languages because several meanings may be fused into single morpheme the opposite of fusional languages the

 epoch: 2210 | train_loss: 0.32, train_acc: 95.9% | test_loss: 0.35, test_acc: 96.3%

 epoch: 2211 | train_loss: 0.29, train_acc: 96.2% | test_loss: 0.33, test_acc: 95.8%


 15%|█▍        | 2214/15000 [04:20<22:39,  9.40it/s]


 epoch: 2212 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.37, test_acc: 95.6%

 epoch: 2213 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.35, test_acc: 95.7%

 epoch: 2214 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.34, test_acc: 95.6%


 15%|█▍        | 2216/15000 [04:20<21:50,  9.75it/s]


 epoch: 2215 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.34, test_acc: 95.9%

 epoch: 2216 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.32, test_acc: 95.8%

 epoch: 2217 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.32, test_acc: 96.2%


 15%|█▍        | 2218/15000 [04:20<21:14, 10.03it/s]


 epoch: 2218 | train_loss: 0.34, train_acc: 96.2% | test_loss: 0.35, test_acc: 96.0%

 epoch: 2219 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.35, test_acc: 95.6%

input:       content and data between communities and applications in this way content that is created in one place dynamically can

target:      content and data between communities and applications in this way content that is created in one place dynamically can be

prediction:  content and data between communities and applications in this way content that is created in one place dynamically can the


 15%|█▍        | 2222/15000 [04:21<30:42,  6.93it/s]


 epoch: 2220 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.32, test_acc: 95.8%

 epoch: 2221 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.35, test_acc: 95.7%

 epoch: 2222 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.36, test_acc: 96.1%


 15%|█▍        | 2224/15000 [04:21<26:59,  7.89it/s]


 epoch: 2223 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.34, test_acc: 95.9%

 epoch: 2224 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.34, test_acc: 95.8%

 epoch: 2225 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.33, test_acc: 96.0%


 15%|█▍        | 2228/15000 [04:22<22:15,  9.56it/s]


 epoch: 2226 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2227 | train_loss: 0.33, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.0%

 epoch: 2228 | train_loss: 0.36, train_acc: 95.8% | test_loss: 0.36, test_acc: 95.8%


 15%|█▍        | 2230/15000 [04:22<22:15,  9.56it/s]


 epoch: 2229 | train_loss: 0.36, train_acc: 95.1% | test_loss: 0.33, test_acc: 96.1%

input:       goals in europe today the council of europe framework convention for the protection of national minorities and the council

target:      goals in europe today the council of europe framework convention for the protection of national minorities and the council of

prediction:  goals in europe today the council of europe framework convention for the protection of national minorities and the council the

 epoch: 2230 | train_loss: 0.31, train_acc: 96.0% | test_loss: 0.28, test_acc: 96.7%


 15%|█▍        | 2232/15000 [04:22<21:26,  9.92it/s]


 epoch: 2231 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.35, test_acc: 95.6%

 epoch: 2232 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.33, test_acc: 95.9%

 epoch: 2233 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.34, test_acc: 95.9%


 15%|█▍        | 2236/15000 [04:22<20:51, 10.20it/s]


 epoch: 2234 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2235 | train_loss: 0.32, train_acc: 95.7% | test_loss: 0.34, test_acc: 96.2%

 epoch: 2236 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.34, test_acc: 96.0%


 15%|█▍        | 2238/15000 [04:23<19:57, 10.66it/s]


 epoch: 2237 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.35, test_acc: 95.9%

 epoch: 2238 | train_loss: 0.35, train_acc: 96.2% | test_loss: 0.35, test_acc: 95.8%

 epoch: 2239 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.33, test_acc: 96.3%


 15%|█▍        | 2240/15000 [04:23<20:21, 10.44it/s]


input:       and nagasaki in august the subsequent surrender of japan on september ended world war ii after world war ii

target:      and nagasaki in august the subsequent surrender of japan on september ended world war ii after world war ii the

prediction:  and nagasaki in august the subsequent surrender of japan on september ended world war ii after world war ii the

 epoch: 2240 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.31, test_acc: 96.1%

 epoch: 2241 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.5%


 15%|█▍        | 2244/15000 [04:23<19:32, 10.88it/s]


 epoch: 2242 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.27, test_acc: 96.8%

 epoch: 2243 | train_loss: 0.35, train_acc: 96.2% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2244 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.4%


 15%|█▍        | 2246/15000 [04:23<19:47, 10.74it/s]


 epoch: 2245 | train_loss: 0.31, train_acc: 96.0% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2246 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2247 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.33, test_acc: 96.3%


 15%|█▍        | 2248/15000 [04:24<19:19, 10.99it/s]


 epoch: 2248 | train_loss: 0.31, train_acc: 95.9% | test_loss: 0.33, test_acc: 95.5%


 15%|█▌        | 2250/15000 [04:24<33:30,  6.34it/s]


 epoch: 2249 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.35, test_acc: 95.8%

input:       majority of religious memeplexes and harden over time they become an inviolable canon or set of dogmas eventually finding

target:      majority of religious memeplexes and harden over time they become an inviolable canon or set of dogmas eventually finding their

prediction:  majority of religious memeplexes and racial over time they become an inviolable canon or set of dogmas eventually finding the

 epoch: 2250 | train_loss: 0.30, train_acc: 96.0% | test_loss: 0.35, test_acc: 96.3%

 epoch: 2251 | train_loss: 0.30, train_acc: 96.0% | test_loss: 0.35, test_acc: 96.3%


 15%|█▌        | 2254/15000 [04:25<25:48,  8.23it/s]


 epoch: 2252 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2253 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2254 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.32, test_acc: 96.5%


 15%|█▌        | 2256/15000 [04:25<23:38,  8.99it/s]


 epoch: 2255 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.33, test_acc: 96.2%

 epoch: 2256 | train_loss: 0.32, train_acc: 95.9% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2257 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.33, test_acc: 96.2%


 15%|█▌        | 2260/15000 [04:25<21:56,  9.68it/s]


 epoch: 2258 | train_loss: 0.33, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2259 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.34, test_acc: 95.8%

input:       more snappy the sasa is also traditional dance where rows of dancers perform rapid synchronised movements in time to

target:      more snappy the sasa is also traditional dance where rows of dancers perform rapid synchronised movements in time to the

prediction:  more snappy the sasa is also traditional dance where rows of dancers perform rapid synchronised movements in time to the

 epoch: 2260 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.0%


 15%|█▌        | 2262/15000 [04:25<20:50, 10.19it/s]


 epoch: 2261 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2262 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.34, test_acc: 95.8%


 15%|█▌        | 2264/15000 [04:26<28:46,  7.38it/s]


 epoch: 2263 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.0%

 epoch: 2264 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.31, test_acc: 96.6%

 epoch: 2265 | train_loss: 0.35, train_acc: 96.0% | test_loss: 0.34, test_acc: 95.9%


 15%|█▌        | 2268/15000 [04:26<23:33,  9.00it/s]


 epoch: 2266 | train_loss: 0.34, train_acc: 96.4% | test_loss: 0.35, test_acc: 96.2%

 epoch: 2267 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2268 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.36, test_acc: 96.1%


 15%|█▌        | 2270/15000 [04:26<22:55,  9.26it/s]


 epoch: 2269 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.33, test_acc: 95.7%

input:       largely independent of each other this means that someone may excel at one type while scoring low on another

target:      largely independent of each other this means that someone may excel at one type while scoring low on another closely

prediction:  largely independent of each other this means that someone may excel at one type while marriages low on another the

 epoch: 2270 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.33, test_acc: 96.4%


 15%|█▌        | 2272/15000 [04:26<21:46,  9.74it/s]


 epoch: 2271 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.36, test_acc: 95.7%

 epoch: 2272 | train_loss: 0.33, train_acc: 95.8% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2273 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.28, test_acc: 96.5%


 15%|█▌        | 2276/15000 [04:27<20:17, 10.45it/s]


 epoch: 2274 | train_loss: 0.35, train_acc: 96.1% | test_loss: 0.31, test_acc: 95.9%

 epoch: 2275 | train_loss: 0.30, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2276 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.33, test_acc: 96.2%


 15%|█▌        | 2279/15000 [04:27<29:08,  7.28it/s]


 epoch: 2277 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.35, test_acc: 96.0%

 epoch: 2278 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.2%

 epoch: 2279 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.33, test_acc: 95.6%


 15%|█▌        | 2281/15000 [04:28<29:02,  7.30it/s]


input:       to survive without the allegiance of the praetorian guard and the legions to secure their loyalty several emperors paid

target:      to survive without the allegiance of the praetorian guard and the legions to secure their loyalty several emperors paid the

prediction:  to survive without the allegiance of the praetorian guard and the legions to secure their loyalty several emperors paid the

 epoch: 2280 | train_loss: 0.30, train_acc: 96.8% | test_loss: 0.32, test_acc: 95.9%

 epoch: 2281 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.4%


 15%|█▌        | 2283/15000 [04:28<26:56,  7.86it/s]


 epoch: 2282 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.7%

 epoch: 2283 | train_loss: 0.29, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.5%


 15%|█▌        | 2285/15000 [04:28<25:19,  8.37it/s]


 epoch: 2284 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.37, test_acc: 95.5%

 epoch: 2285 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.36, test_acc: 95.5%


 15%|█▌        | 2287/15000 [04:28<24:25,  8.68it/s]


 epoch: 2286 | train_loss: 0.33, train_acc: 95.8% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2287 | train_loss: 0.32, train_acc: 96.5% | test_loss: 0.34, test_acc: 96.1%


 15%|█▌        | 2289/15000 [04:29<24:34,  8.62it/s]


 epoch: 2288 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.38, test_acc: 95.5%

 epoch: 2289 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.36, test_acc: 95.6%

input:       the conversion of constantine latin literature is dominated by the christian perspective in the late th century jerome produced


 15%|█▌        | 2291/15000 [04:29<26:44,  7.92it/s]


target:      the conversion of constantine latin literature is dominated by the christian perspective in the late th century jerome produced the

prediction:  the conversion of constantine latin literature is dominated by the christian perspective in the late th century jerome produced the

 epoch: 2290 | train_loss: 0.33, train_acc: 95.8% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2291 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.34, test_acc: 96.0%


 15%|█▌        | 2293/15000 [04:30<43:19,  4.89it/s]


 epoch: 2292 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.36, test_acc: 95.5%

 epoch: 2293 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.35, test_acc: 96.2%


 15%|█▌        | 2295/15000 [04:30<33:15,  6.37it/s]


 epoch: 2294 | train_loss: 0.28, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2295 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.6%


 15%|█▌        | 2297/15000 [04:30<28:43,  7.37it/s]


 epoch: 2296 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.37, test_acc: 95.4%

 epoch: 2297 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.4%


 15%|█▌        | 2299/15000 [04:30<27:02,  7.83it/s]


 epoch: 2298 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2299 | train_loss: 0.32, train_acc: 96.0% | test_loss: 0.28, test_acc: 97.0%

input:       gal pagos islands as being in polynesia while noting that they are not culturally part of the subregion the


 15%|█▌        | 2301/15000 [04:30<28:07,  7.52it/s]


target:      gal pagos islands as being in polynesia while noting that they are not culturally part of the subregion the islands

prediction:  gal pagos islands as being in polynesia while noting that they are not culturally part of the subregion the the

 epoch: 2300 | train_loss: 0.34, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2301 | train_loss: 0.35, train_acc: 95.9% | test_loss: 0.31, test_acc: 96.5%


 15%|█▌        | 2303/15000 [04:31<25:36,  8.26it/s]


 epoch: 2302 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2303 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.36, test_acc: 95.6%


 15%|█▌        | 2304/15000 [04:31<24:26,  8.66it/s]


 epoch: 2304 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.7%

 epoch: 2305 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.28, test_acc: 96.4%


 15%|█▌        | 2308/15000 [04:32<34:03,  6.21it/s]


 epoch: 2306 | train_loss: 0.31, train_acc: 96.6% | test_loss: 0.33, test_acc: 96.5%

 epoch: 2307 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.32, test_acc: 96.0%

 epoch: 2308 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.33, test_acc: 96.0%


 15%|█▌        | 2310/15000 [04:32<29:42,  7.12it/s]


 epoch: 2309 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.34, test_acc: 96.0%

input:       factions of the roman senate by prohibiting traditional paganism at rome and relinquishing his title of pontifex maximus the

target:      factions of the roman senate by prohibiting traditional paganism at rome and relinquishing his title of pontifex maximus the senior

prediction:  factions of the roman senate by prohibiting traditional paganism at rome and relinquishing his title of pontifex maximus the the

 epoch: 2310 | train_loss: 0.32, train_acc: 96.0% | test_loss: 0.33, test_acc: 96.0%


 15%|█▌        | 2312/15000 [04:32<26:15,  8.05it/s]


 epoch: 2311 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2312 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.35, test_acc: 95.8%

 epoch: 2313 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.32, test_acc: 96.4%


 15%|█▌        | 2316/15000 [04:32<21:40,  9.76it/s]


 epoch: 2314 | train_loss: 0.29, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2315 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2316 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.34, test_acc: 96.0%


 15%|█▌        | 2318/15000 [04:33<20:23, 10.37it/s]


 epoch: 2317 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.0%

 epoch: 2318 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2319 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.34, test_acc: 96.1%


 15%|█▌        | 2320/15000 [04:33<21:04, 10.03it/s]


input:       furniture as well as the availability of library and canteen tend to contribute to educational success the quality of

target:      furniture as well as the availability of library and canteen tend to contribute to educational success the quality of the

prediction:  furniture as well as the availability of library and season tend to contribute to educational success the quality of the

 epoch: 2320 | train_loss: 0.33, train_acc: 95.8% | test_loss: 0.31, test_acc: 95.9%


 15%|█▌        | 2322/15000 [04:33<29:30,  7.16it/s]


 epoch: 2321 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.27, test_acc: 96.7%

 epoch: 2322 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.31, test_acc: 96.0%

 epoch: 2323 | train_loss: 0.31, train_acc: 96.6% | test_loss: 0.35, test_acc: 95.7%


 16%|█▌        | 2326/15000 [04:34<23:28,  9.00it/s]


 epoch: 2324 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.34, test_acc: 96.1%

 epoch: 2325 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2326 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.33, test_acc: 95.9%


 16%|█▌        | 2328/15000 [04:34<22:14,  9.50it/s]


 epoch: 2327 | train_loss: 0.36, train_acc: 95.4% | test_loss: 0.34, test_acc: 95.7%

 epoch: 2328 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.34, test_acc: 96.3%

 epoch: 2329 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.2%


 16%|█▌        | 2330/15000 [04:34<21:49,  9.67it/s]


input:       big word classes can be open if new words can continuously be added to the class or relatively closed

target:      big word classes can be open if new words can continuously be added to the class or relatively closed if

prediction:  big word classes can be open if new words can continuously be added to the class or relatively closed the

 epoch: 2330 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.33, test_acc: 96.0%

 epoch: 2331 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.31, test_acc: 96.4%


 16%|█▌        | 2334/15000 [04:34<20:55, 10.09it/s]


 epoch: 2332 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2333 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2334 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.32, test_acc: 95.9%


 16%|█▌        | 2336/15000 [04:35<34:09,  6.18it/s]


 epoch: 2335 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.34, test_acc: 95.8%

 epoch: 2336 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.35, test_acc: 95.9%

 epoch: 2337 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.3%


 16%|█▌        | 2340/15000 [04:35<26:50,  7.86it/s]


 epoch: 2338 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2339 | train_loss: 0.37, train_acc: 95.5% | test_loss: 0.29, test_acc: 96.7%

input:       called phantom energy theories suggest that ultimately galaxy clusters stars planets atoms nuclei and matter itself will be torn

target:      called phantom energy theories suggest that ultimately galaxy clusters stars planets atoms nuclei and matter itself will be torn apart

prediction:  called phantom energy theories suggest that ultimately galaxy clusters stars planets atoms nuclei and matter itself will be torn the

 epoch: 2340 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.33, test_acc: 96.4%


 16%|█▌        | 2342/15000 [04:35<24:04,  8.76it/s]


 epoch: 2341 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2342 | train_loss: 0.36, train_acc: 95.9% | test_loss: 0.34, test_acc: 95.9%

 epoch: 2343 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.33, test_acc: 96.1%


 16%|█▌        | 2346/15000 [04:36<21:39,  9.73it/s]


 epoch: 2344 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2345 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.35, test_acc: 95.8%

 epoch: 2346 | train_loss: 0.31, train_acc: 96.0% | test_loss: 0.34, test_acc: 95.6%


 16%|█▌        | 2348/15000 [04:36<21:03, 10.01it/s]


 epoch: 2347 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.34, test_acc: 95.8%

 epoch: 2348 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.34, test_acc: 96.0%


 16%|█▌        | 2350/15000 [04:37<36:40,  5.75it/s]


 epoch: 2349 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.33, test_acc: 96.0%

input:       but living within the roman world were peregrini non romans in the constitutio antoniniana extended citizenship to all freeborn

target:      but living within the roman world were peregrini non romans in the constitutio antoniniana extended citizenship to all freeborn inhabitants

prediction:  but living within the roman world were peregrini non romans in the constitutio antoniniana extended citizenship to all freeborn the

 epoch: 2350 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.30, test_acc: 96.3%


 16%|█▌        | 2352/15000 [04:37<31:08,  6.77it/s]


 epoch: 2351 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2352 | train_loss: 0.35, train_acc: 95.7% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2353 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.4%


 16%|█▌        | 2356/15000 [04:37<24:03,  8.76it/s]


 epoch: 2354 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2355 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2356 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.29, test_acc: 96.1%


 16%|█▌        | 2358/15000 [04:37<22:10,  9.50it/s]


 epoch: 2357 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.33, test_acc: 95.7%

 epoch: 2358 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2359 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.33, test_acc: 96.3%


 16%|█▌        | 2360/15000 [04:38<22:06,  9.53it/s]


input:       the latter example may be used as way of placing special emphasis on who thereby slightly altering the meaning

target:      the latter example may be used as way of placing special emphasis on who thereby slightly altering the meaning of

prediction:  the latter example may be used as way of placing special emphasis on who thereby slightly altering the meaning the

 epoch: 2360 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.33, test_acc: 96.3%

 epoch: 2361 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.28, test_acc: 96.5%


 16%|█▌        | 2362/15000 [04:38<21:15,  9.90it/s]


 epoch: 2362 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2363 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.33, test_acc: 95.9%


 16%|█▌        | 2366/15000 [04:39<29:01,  7.25it/s]


 epoch: 2364 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.34, test_acc: 96.1%

 epoch: 2365 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.33, test_acc: 96.2%

 epoch: 2366 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.36, test_acc: 95.8%


 16%|█▌        | 2368/15000 [04:39<25:28,  8.27it/s]


 epoch: 2367 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.33, test_acc: 95.9%

 epoch: 2368 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2369 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.32, test_acc: 95.8%


 16%|█▌        | 2370/15000 [04:39<24:22,  8.64it/s]


input:       the emergence of the present universe from an ultra dense and high temperature initial state it is misleading to

target:      the emergence of the present universe from an ultra dense and high temperature initial state it is misleading to visualize

prediction:  the emergence of the present universe from an aids dense and high temperature initial state it is misleading to the

 epoch: 2370 | train_loss: 0.34, train_acc: 95.8% | test_loss: 0.35, test_acc: 96.0%

 epoch: 2371 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.33, test_acc: 96.0%


 16%|█▌        | 2374/15000 [04:39<21:26,  9.82it/s]


 epoch: 2372 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.36, test_acc: 95.6%

 epoch: 2373 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2374 | train_loss: 0.32, train_acc: 95.9% | test_loss: 0.33, test_acc: 95.9%


 16%|█▌        | 2376/15000 [04:39<21:06,  9.97it/s]


 epoch: 2375 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.35, test_acc: 95.6%

 epoch: 2376 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.1%

 epoch: 2377 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.33, test_acc: 95.9%


 16%|█▌        | 2378/15000 [04:40<34:30,  6.10it/s]


 epoch: 2378 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.35, test_acc: 95.6%

 epoch: 2379 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.36, test_acc: 96.1%

input:       of the moderators and set the forum to private alleging it to have become infested with racism and sexism

target:      of the moderators and set the forum to private alleging it to have become infested with racism and sexism reddit

prediction:  of the moderators and set the forum to private hope it to have become education with racism and natives the


 16%|█▌        | 2382/15000 [04:40<26:53,  7.82it/s]


 epoch: 2380 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.33, test_acc: 95.6%

 epoch: 2381 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.35, test_acc: 96.2%

 epoch: 2382 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.28, test_acc: 96.5%

 epoch: 2383 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.33, test_acc: 96.3%


 16%|█▌        | 2386/15000 [04:41<22:37,  9.29it/s]


 epoch: 2384 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.2%

 epoch: 2385 | train_loss: 0.32, train_acc: 95.9% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2386 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.5%


 16%|█▌        | 2388/15000 [04:41<22:33,  9.32it/s]


 epoch: 2387 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.30, test_acc: 96.7%

 epoch: 2388 | train_loss: 0.34, train_acc: 95.6% | test_loss: 0.35, test_acc: 95.7%


 16%|█▌        | 2390/15000 [04:41<23:56,  8.78it/s]


 epoch: 2389 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.29, test_acc: 96.3%

input:       was released for general availability in december and is included with windows and windows server by default new features

target:      was released for general availability in december and is included with windows and windows server by default new features in

prediction:  was released for general availability in december and is included with windows and windows server by default new features the

 epoch: 2390 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.28, test_acc: 96.4%


 16%|█▌        | 2392/15000 [04:42<23:43,  8.86it/s]


 epoch: 2391 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.33, test_acc: 95.9%

 epoch: 2392 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.31, test_acc: 96.4%


 16%|█▌        | 2394/15000 [04:42<38:09,  5.51it/s]


 epoch: 2393 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.34, test_acc: 95.9%

 epoch: 2394 | train_loss: 0.32, train_acc: 96.0% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2395 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.31, test_acc: 96.4%

 16%|█▌        | 2396/15000 [04:42<30:48,  6.82it/s]



 epoch: 2396 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.32, test_acc: 96.2%


 16%|█▌        | 2398/15000 [04:43<28:04,  7.48it/s]


 epoch: 2397 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.30, test_acc: 96.2%

 epoch: 2398 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.31, test_acc: 96.3%


 16%|█▌        | 2400/15000 [04:43<28:01,  7.49it/s]


 epoch: 2399 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.33, test_acc: 96.2%

input:       emperor by the pope in this led in to the founding of the holy roman empire which eventually became

target:      emperor by the pope in this led in to the founding of the holy roman empire which eventually became centred

prediction:  emperor by the pope in this led in to the founding of the holy roman empire which eventually became the

 epoch: 2400 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.2%


 16%|█▌        | 2402/15000 [04:43<26:13,  8.00it/s]


 epoch: 2401 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.4%

 epoch: 2402 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.35, test_acc: 95.9%


 16%|█▌        | 2405/15000 [04:43<23:03,  9.11it/s]


 epoch: 2403 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2404 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2405 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.30, test_acc: 96.5%


 16%|█▌        | 2406/15000 [04:43<22:45,  9.23it/s]


 epoch: 2406 | train_loss: 0.31, train_acc: 96.0% | test_loss: 0.33, test_acc: 96.0%


 16%|█▌        | 2408/15000 [04:44<38:21,  5.47it/s]


 epoch: 2407 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.38, test_acc: 95.7%

 epoch: 2408 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.30, test_acc: 96.3%


 16%|█▌        | 2410/15000 [04:44<34:18,  6.12it/s]


 epoch: 2409 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.1%

input:       papyri preserve complex accounting methods that suggest elements of economic rationalism and the empire was highly monetized although the

target:      papyri preserve complex accounting methods that suggest elements of economic rationalism and the empire was highly monetized although the means

prediction:  papyri preserve complex accounting methods that suggest elements of economic rationalism and the empire was highly monetized although the the

 epoch: 2410 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.6%


 16%|█▌        | 2413/15000 [04:45<25:45,  8.15it/s]


 epoch: 2411 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2412 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2413 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.2%


 16%|█▌        | 2415/15000 [04:45<22:35,  9.28it/s]


 epoch: 2414 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.26, test_acc: 97.1%

 epoch: 2415 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2416 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.29, test_acc: 96.3%


 16%|█▌        | 2419/15000 [04:45<20:35, 10.18it/s]


 epoch: 2417 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.35, test_acc: 96.3%

 epoch: 2418 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2419 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.5%

input:       popular sport among australian women is netball while australian rules football garners the highest spectatorship numbers and television ratings

target:      popular sport among australian women is netball while australian rules football garners the highest spectatorship numbers and television ratings rugby

prediction:  popular sport among australian women is requirement while australian rules football pulse the highest tank numbers and television ratings the

 epoch: 2420 | train_loss: 0.32, train_acc: 96.5% | test_loss: 0.38, test_acc: 95.4%


 16%|█▌        | 2423/15000 [04:46<23:30,  8.92it/s]


 epoch: 2421 | train_loss: 0.34, train_acc: 96.0% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2422 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.33, test_acc: 96.2%

 epoch: 2423 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.8%


 16%|█▌        | 2425/15000 [04:46<21:44,  9.64it/s]


 epoch: 2424 | train_loss: 0.29, train_acc: 96.9% | test_loss: 0.33, test_acc: 96.0%

 epoch: 2425 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.33, test_acc: 95.7%

 epoch: 2426 | train_loss: 0.30, train_acc: 96.2% | test_loss: 0.31, test_acc: 96.4%


 16%|█▌        | 2429/15000 [04:46<19:38, 10.67it/s]


 epoch: 2427 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2428 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2429 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.29, test_acc: 96.6%


 16%|█▌        | 2431/15000 [04:46<20:14, 10.35it/s]


input:       requirement for latin in the empire but it represented certain status high standards of latin latinitas started with the

target:      requirement for latin in the empire but it represented certain status high standards of latin latinitas started with the advent

prediction:  requirement for latin in the empire but it represented certain status high standards of latin latinitas started with the the

 epoch: 2430 | train_loss: 0.30, train_acc: 96.0% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2431 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.34, test_acc: 95.9%


 16%|█▌        | 2433/15000 [04:47<20:17, 10.32it/s]


 epoch: 2432 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.31, test_acc: 95.9%

 epoch: 2433 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2434 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.34, test_acc: 96.0%


 16%|█▌        | 2437/15000 [04:47<28:56,  7.23it/s]


 epoch: 2435 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2436 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2437 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.33, test_acc: 96.4%


 16%|█▋        | 2439/15000 [04:48<25:31,  8.20it/s]


 epoch: 2438 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2439 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.3%

input:       the cook islands society islands and austral islands in the center and the marquesas islands the tuamotus mangareva islands

target:      the cook islands society islands and austral islands in the center and the marquesas islands the tuamotus mangareva islands and

prediction:  the cook islands society islands and certainly islands in the center and the marquesas islands the charlie design islands the

 epoch: 2440 | train_loss: 0.29, train_acc: 96.2% | test_loss: 0.34, test_acc: 95.9%


 16%|█▋        | 2443/15000 [04:48<22:12,  9.42it/s]


 epoch: 2441 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2442 | train_loss: 0.33, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2443 | train_loss: 0.36, train_acc: 95.7% | test_loss: 0.31, test_acc: 96.4%


 16%|█▋        | 2445/15000 [04:48<21:11,  9.88it/s]


 epoch: 2444 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2445 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.33, test_acc: 96.1%

 epoch: 2446 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.33, test_acc: 96.0%


 16%|█▋        | 2447/15000 [04:48<20:22, 10.27it/s]


 epoch: 2447 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.2%

 epoch: 2448 | train_loss: 0.33, train_acc: 95.8% | test_loss: 0.28, test_acc: 96.4%


 16%|█▋        | 2450/15000 [04:49<32:58,  6.34it/s]


 epoch: 2449 | train_loss: 0.32, train_acc: 96.0% | test_loss: 0.30, test_acc: 96.5%

input:       and mostly in the southern hemisphere with relatively small portion in the northern hemisphere at the northern tip of

target:      and mostly in the southern hemisphere with relatively small portion in the northern hemisphere at the northern tip of the

prediction:  and mostly in the southern hemisphere with relatively small portion in the northern hemisphere at the northern tip of the

 epoch: 2450 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.33, test_acc: 96.3%


 16%|█▋        | 2452/15000 [04:49<27:55,  7.49it/s]


 epoch: 2451 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.34, test_acc: 95.7%

 epoch: 2452 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.33, test_acc: 95.8%

 epoch: 2453 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.4%


 16%|█▋        | 2456/15000 [04:50<21:58,  9.51it/s]


 epoch: 2454 | train_loss: 0.30, train_acc: 96.1% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2455 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2456 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.25, test_acc: 97.1%


 16%|█▋        | 2458/15000 [04:50<20:23, 10.25it/s]


 epoch: 2457 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.34, test_acc: 96.4%

 epoch: 2458 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2459 | train_loss: 0.33, train_acc: 95.9% | test_loss: 0.33, test_acc: 96.2%


 16%|█▋        | 2460/15000 [04:50<20:50, 10.03it/s]


input:       metamorphosed deformation typically occurs as result of horizontal shortening horizontal extension or side to side strike slip motion these

target:      metamorphosed deformation typically occurs as result of horizontal shortening horizontal extension or side to side strike slip motion these structural

prediction:  metamorphosed deformation typically occurs as result of horizontal shortening horizontal extension or side to side strike slip motion these the

 epoch: 2460 | train_loss: 0.31, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2461 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.33, test_acc: 95.8%


 16%|█▋        | 2462/15000 [04:50<20:27, 10.22it/s]


 epoch: 2462 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2463 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.31, test_acc: 96.2%


 16%|█▋        | 2466/15000 [04:51<28:47,  7.26it/s]


 epoch: 2464 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.2%

 epoch: 2465 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2466 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.1%


 16%|█▋        | 2468/15000 [04:51<25:36,  8.16it/s]


 epoch: 2467 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.7%

 epoch: 2468 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.34, test_acc: 95.7%

 epoch: 2469 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.33, test_acc: 96.0%


 16%|█▋        | 2470/15000 [04:51<24:20,  8.58it/s]


input:       millimetre percent of the earth surface is covered by salt water oceans the remainder consists of continents and islands

target:      millimetre percent of the earth surface is covered by salt water oceans the remainder consists of continents and islands with

prediction:  millimetre percent of the earth surface is covered by salt water oceans the remainder consists of continents and islands the

 epoch: 2470 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.35, test_acc: 95.9%

 epoch: 2471 | train_loss: 0.34, train_acc: 95.9% | test_loss: 0.31, test_acc: 96.5%


 16%|█▋        | 2474/15000 [04:52<20:51, 10.01it/s]


 epoch: 2472 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2473 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2474 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.3%


 17%|█▋        | 2476/15000 [04:52<20:21, 10.25it/s]


 epoch: 2475 | train_loss: 0.30, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2476 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.28, test_acc: 96.8%

 epoch: 2477 | train_loss: 0.26, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.4%


 17%|█▋        | 2480/15000 [04:53<29:33,  7.06it/s]


 epoch: 2478 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2479 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.32, test_acc: 96.3%

input:       southern central and southeast africa the bantu speaking peoples from the sahel progressively expanded over most of sub saharan

target:      southern central and southeast africa the bantu speaking peoples from the sahel progressively expanded over most of sub saharan africa

prediction:  southern central and southeast africa the bantu speaking peoples from the sahel progressively expanded over most of sub saharan the

 epoch: 2480 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.1%


 17%|█▋        | 2482/15000 [04:53<26:14,  7.95it/s]


 epoch: 2481 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2482 | train_loss: 0.28, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.2%

 epoch: 2483 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.33, test_acc: 95.7%


 17%|█▋        | 2486/15000 [04:53<22:10,  9.41it/s]


 epoch: 2484 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2485 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2486 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.4%


 17%|█▋        | 2488/15000 [04:53<20:48, 10.02it/s]


 epoch: 2487 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2488 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.27, test_acc: 96.6%

 epoch: 2489 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.9%


 17%|█▋        | 2490/15000 [04:54<21:09,  9.85it/s]


input:       being in the australasia subregion the edition of the south pacific handbook by david stanley groups australia new zealand

target:      being in the australasia subregion the edition of the south pacific handbook by david stanley groups australia new zealand norfolk

prediction:  being in the australasia subregion the edition of the south pacific handbook by david stanley groups australia new zealand the

 epoch: 2490 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2491 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.7%


 17%|█▋        | 2492/15000 [04:54<20:29, 10.17it/s]


 epoch: 2492 | train_loss: 0.29, train_acc: 96.3% | test_loss: 0.27, test_acc: 96.7%


 17%|█▋        | 2494/15000 [04:54<32:57,  6.32it/s]


 epoch: 2493 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.5%

 epoch: 2494 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.0%

 epoch: 2495 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.5%


 17%|█▋        | 2498/15000 [04:55<25:13,  8.26it/s]


 epoch: 2496 | train_loss: 0.33, train_acc: 96.0% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2497 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2498 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.3%


 17%|█▋        | 2500/15000 [04:55<26:02,  8.00it/s]


 epoch: 2499 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.4%

input:       common currency the euro and participate in the european single market and customs union large bloc of countries the

target:      common currency the euro and participate in the european single market and customs union large bloc of countries the schengen

prediction:  common currency the euro and participate in the european single market and customs union large bloc of countries the the

 epoch: 2500 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.34, test_acc: 96.1%


 17%|█▋        | 2502/15000 [04:55<25:04,  8.31it/s]


 epoch: 2501 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.5%

 epoch: 2502 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.3%


 17%|█▋        | 2505/15000 [04:55<23:05,  9.02it/s]


 epoch: 2503 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.26, test_acc: 96.8%

 epoch: 2504 | train_loss: 0.31, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2505 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.32, test_acc: 95.8%


 17%|█▋        | 2506/15000 [04:56<23:24,  8.89it/s]


 epoch: 2506 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.33, test_acc: 96.3%


 17%|█▋        | 2508/15000 [04:56<34:34,  6.02it/s]


 epoch: 2507 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.34, test_acc: 96.3%

 epoch: 2508 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.32, test_acc: 96.1%


 17%|█▋        | 2510/15000 [04:56<32:12,  6.46it/s]


 epoch: 2509 | train_loss: 0.35, train_acc: 95.8% | test_loss: 0.29, test_acc: 96.7%

input:       plus access control bug tracking software feature requests task management continuous integration and wikis for every project headquartered in

target:      plus access control bug tracking software feature requests task management continuous integration and wikis for every project headquartered in california

prediction:  plus access control bug tracking software feature requests task management continuous integration and wikis for every project headquartered in the

 epoch: 2510 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.5%


 17%|█▋        | 2512/15000 [04:57<27:16,  7.63it/s]


 epoch: 2511 | train_loss: 0.34, train_acc: 96.1% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2512 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.2%


 17%|█▋        | 2514/15000 [04:57<24:50,  8.37it/s]


 epoch: 2513 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2514 | train_loss: 0.30, train_acc: 95.8% | test_loss: 0.33, test_acc: 95.9%


 17%|█▋        | 2516/15000 [04:57<23:59,  8.67it/s]


 epoch: 2515 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2516 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.30, test_acc: 96.1%


 17%|█▋        | 2518/15000 [04:57<23:45,  8.76it/s]


 epoch: 2517 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2518 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.34, test_acc: 96.2%


 17%|█▋        | 2520/15000 [04:58<27:05,  7.68it/s]


 epoch: 2519 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.6%

input:       of psychology includes evidence of racism the idea of white supremacy and indeed the modern concept of race itself

target:      of psychology includes evidence of racism the idea of white supremacy and indeed the modern concept of race itself arose

prediction:  of psychology includes evidence of racism the idea of white supremacy and indeed the modern concept of race itself the

 epoch: 2520 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.30, test_acc: 96.4%


 17%|█▋        | 2521/15000 [04:58<26:57,  7.71it/s]


 epoch: 2521 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.8%

 epoch: 2522 | train_loss: 0.28, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.7%


 17%|█▋        | 2525/15000 [04:58<22:43,  9.15it/s]


 epoch: 2523 | train_loss: 0.29, train_acc: 96.2% | test_loss: 0.29, test_acc: 96.2%

 epoch: 2524 | train_loss: 0.32, train_acc: 95.9% | test_loss: 0.33, test_acc: 96.2%

 epoch: 2525 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.32, test_acc: 96.2%


 17%|█▋        | 2526/15000 [04:58<22:53,  9.08it/s]


 epoch: 2526 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2527 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.33, test_acc: 96.0%

 epoch: 2528 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.32, test_acc: 96.3%


 17%|█▋        | 2530/15000 [04:59<25:16,  8.23it/s]


 epoch: 2529 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.5%

input:       temperatures the average annual temperatures in the amazon basin oscillate around with low thermal amplitudes and high rainfall

target:      temperatures the average annual temperatures in the amazon basin oscillate around with low thermal amplitudes and high rainfall indices

prediction:  temperatures the average annual temperatures in the amazon basin caught around with low thermal constantinople and high rainfall the

 epoch: 2530 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.31, test_acc: 96.2%


 17%|█▋        | 2532/15000 [04:59<24:21,  8.53it/s]


 epoch: 2531 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2532 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.6%

 epoch: 2533 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.4%


 17%|█▋        | 2534/15000 [04:59<22:37,  9.18it/s]


 epoch: 2534 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.30, test_acc: 96.5%


 17%|█▋        | 2537/15000 [05:00<33:29,  6.20it/s]


 epoch: 2535 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.9%

 epoch: 2536 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2537 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.35, test_acc: 95.9%


 17%|█▋        | 2539/15000 [05:00<27:49,  7.46it/s]


 epoch: 2538 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.33, test_acc: 96.4%

 epoch: 2539 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.33, test_acc: 96.3%

input:       other underground detectors ibex is already yielding new astrophysical discoveries no one knows what is creating the ena energetic

target:      other underground detectors ibex is already yielding new astrophysical discoveries no one knows what is creating the ena energetic neutral

prediction:  other underground detectors progressive is already yielding new oaths discoveries no one knows what is creating the ena energetic the


 17%|█▋        | 2542/15000 [05:00<24:12,  8.58it/s]


 epoch: 2540 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.32, test_acc: 96.1%

 epoch: 2541 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.29, test_acc: 95.7%

 epoch: 2542 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.32, test_acc: 96.4%


 17%|█▋        | 2544/15000 [05:00<22:09,  9.37it/s]


 epoch: 2543 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2544 | train_loss: 0.32, train_acc: 95.6% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2545 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.34, test_acc: 96.0%


 17%|█▋        | 2548/15000 [05:01<20:31, 10.11it/s]


 epoch: 2546 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2547 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.33, test_acc: 96.2%

 epoch: 2548 | train_loss: 0.30, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2549 | train_loss: 0.29, train_acc: 96.9% | test_loss: 0.31, test_acc: 96.5%

input:       capital maximus soon entered negotiations with valentinian ii and theodosius attempting to gain their official recognition by negotiations were

target:      capital maximus soon entered negotiations with valentinian ii and theodosius attempting to gain their official recognition by negotiations were unfruitful

prediction:  capital maximus soon entered negotiations with valentinian ii and theodosius attempting to gain their official recognition by negotiations were the


 17%|█▋        | 2552/15000 [05:02<28:55,  7.17it/s]


 epoch: 2550 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2551 | train_loss: 0.35, train_acc: 96.0% | test_loss: 0.32, test_acc: 96.8%

 epoch: 2552 | train_loss: 0.31, train_acc: 96.6% | test_loss: 0.28, test_acc: 97.0%


 17%|█▋        | 2554/15000 [05:02<25:14,  8.22it/s]


 epoch: 2553 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2554 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2555 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.4%


 17%|█▋        | 2558/15000 [05:02<21:37,  9.59it/s]


 epoch: 2556 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2557 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2558 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.32, test_acc: 96.6%


 17%|█▋        | 2560/15000 [05:02<21:34,  9.61it/s]


 epoch: 2559 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.30, test_acc: 96.2%

input:       when people take them they have already been introduced to the subject and know what to expect classes provide

target:      when people take them they have already been introduced to the subject and know what to expect classes provide high

prediction:  when people take them they have already been introduced to the subject and know what to expect classes provide the

 epoch: 2560 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2561 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.4%

 17%|█▋        | 2562/15000 [05:03<20:48,  9.96it/s]



 epoch: 2562 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2563 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.2%


 17%|█▋        | 2566/15000 [05:03<21:10,  9.79it/s]


 epoch: 2564 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2565 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.27, test_acc: 96.7%

 epoch: 2566 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.30, test_acc: 96.5%


 17%|█▋        | 2568/15000 [05:03<20:01, 10.34it/s]


 epoch: 2567 | train_loss: 0.27, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2568 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2569 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.6%


 17%|█▋        | 2570/15000 [05:03<20:16, 10.22it/s]


input:       of the north pacific the american coastal islands and the netherlands east indies and that he included australia and

target:      of the north pacific the american coastal islands and the netherlands east indies and that he included australia and new

prediction:  of the north pacific the american coastal islands and the netherlands east indies and that he included australia and the

 epoch: 2570 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.28, test_acc: 96.5%

 epoch: 2571 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.30, test_acc: 96.3%


 17%|█▋        | 2574/15000 [05:04<18:50, 10.99it/s]


 epoch: 2572 | train_loss: 0.30, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2573 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2574 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.1%


 17%|█▋        | 2576/15000 [05:04<19:06, 10.84it/s]


 epoch: 2575 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2576 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2577 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.4%


 17%|█▋        | 2580/15000 [05:05<29:38,  6.99it/s]


 epoch: 2578 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.36, test_acc: 95.6%

 epoch: 2579 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.31, test_acc: 96.4%

input:       rhein barcelona and berlin europe as cultural concept is substantially derived from the shared heritage of ancient greece

target:      rhein barcelona and berlin europe as cultural concept is substantially derived from the shared heritage of ancient greece and

prediction:  rhein barcelona and berlin europe as cultural concept is substantially derived from the shared heritage of ancient greece the

 epoch: 2580 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.8%


 17%|█▋        | 2582/15000 [05:05<25:45,  8.04it/s]


 epoch: 2581 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2582 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.27, test_acc: 96.9%

 epoch: 2583 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.33, test_acc: 96.3%


 17%|█▋        | 2586/15000 [05:05<21:32,  9.60it/s]


 epoch: 2584 | train_loss: 0.26, train_acc: 96.6% | test_loss: 0.32, test_acc: 96.2%

 epoch: 2585 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2586 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.4%


 17%|█▋        | 2588/15000 [05:05<20:42,  9.99it/s]


 epoch: 2587 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.6%

 epoch: 2588 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.33, test_acc: 96.4%

 epoch: 2589 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.6%


 17%|█▋        | 2590/15000 [05:06<21:22,  9.68it/s]


input:       of knowledge skills and character traits however there is extensive debate regarding its exact nature beyond these general features

target:      of knowledge skills and character traits however there is extensive debate regarding its exact nature beyond these general features some

prediction:  of knowledge skills and character traits however there is extensive debate regarding its exact nature beyond these general features the

 epoch: 2590 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.32, test_acc: 96.0%

 epoch: 2591 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.4%


 17%|█▋        | 2592/15000 [05:06<21:01,  9.83it/s]


 epoch: 2592 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.31, test_acc: 96.3%


 17%|█▋        | 2594/15000 [05:06<33:23,  6.19it/s]


 epoch: 2593 | train_loss: 0.33, train_acc: 96.3% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2594 | train_loss: 0.33, train_acc: 96.1% | test_loss: 0.28, test_acc: 96.9%

 epoch: 2595 | train_loss: 0.28, train_acc: 96.4% | test_loss: 0.35, test_acc: 95.8%


 17%|█▋        | 2598/15000 [05:07<25:36,  8.07it/s]


 epoch: 2596 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2597 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.35, test_acc: 95.9%

 epoch: 2598 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.4%


 17%|█▋        | 2600/15000 [05:07<24:22,  8.48it/s]


 epoch: 2599 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.28, test_acc: 97.0%

input:       of states federal district five major unincorporated territories nine minor outlying islands and indian reservations it is the world

target:      of states federal district five major unincorporated territories nine minor outlying islands and indian reservations it is the world third

prediction:  of states federal district five major unincorporated territories nine minor outlying islands and indian reservations it is the world the

 epoch: 2600 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.7%


 17%|█▋        | 2603/15000 [05:07<21:55,  9.42it/s]


 epoch: 2601 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2602 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.28, test_acc: 96.8%

 epoch: 2603 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.32, test_acc: 96.3%


 17%|█▋        | 2605/15000 [05:07<20:32, 10.06it/s]


 epoch: 2604 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2605 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2606 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.5%


 17%|█▋        | 2609/15000 [05:08<20:13, 10.21it/s]


 epoch: 2607 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2608 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2609 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.31, test_acc: 96.3%


 17%|█▋        | 2611/15000 [05:08<20:59,  9.83it/s]


input:       would be magnified to cosmic scale these fluctuations served as the seeds for all the current structures in the

target:      would be magnified to cosmic scale these fluctuations served as the seeds for all the current structures in the universe

prediction:  would be magnified to cosmic scale these fluctuations served as the seeds for all the current structures in the the

 epoch: 2610 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2611 | train_loss: 0.30, train_acc: 96.2% | test_loss: 0.30, test_acc: 96.4%


 17%|█▋        | 2613/15000 [05:08<20:28, 10.09it/s]


 epoch: 2612 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.8%

 epoch: 2613 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2614 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.4%


 17%|█▋        | 2617/15000 [05:09<19:06, 10.80it/s]


 epoch: 2615 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2616 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2617 | train_loss: 0.31, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.4%


 17%|█▋        | 2619/15000 [05:09<19:25, 10.63it/s]


 epoch: 2618 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.1%

 epoch: 2619 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.4%

input:       being the rainiest place in the world along with the northern slopes of indian himalayas the atacama desert along

target:      being the rainiest place in the world along with the northern slopes of indian himalayas the atacama desert along this

prediction:  being the rainiest place in the world along with the northern slopes of indian himalayas the atacama desert along the

 epoch: 2620 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.30, test_acc: 96.3%


 17%|█▋        | 2622/15000 [05:10<34:21,  6.00it/s]


 epoch: 2621 | train_loss: 0.28, train_acc: 96.3% | test_loss: 0.35, test_acc: 95.8%

 epoch: 2622 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.30, test_acc: 96.6%


 17%|█▋        | 2624/15000 [05:10<30:19,  6.80it/s]


 epoch: 2623 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2624 | train_loss: 0.30, train_acc: 96.2% | test_loss: 0.32, test_acc: 96.4%


 18%|█▊        | 2627/15000 [05:10<25:28,  8.09it/s]


 epoch: 2625 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2626 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2627 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.7%


 18%|█▊        | 2628/15000 [05:10<24:31,  8.41it/s]


 epoch: 2628 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.7%

 epoch: 2629 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.4%

input:       states matriculation boards of various states state boards of various boards anglo indian board among others today typical syllabus

target:      states matriculation boards of various states state boards of various boards anglo indian board among others today typical syllabus includes


 18%|█▊        | 2631/15000 [05:11<24:34,  8.39it/s]


prediction:  states matriculation boards of various states state boards of various boards anglo indian board among others today typical syllabus the

 epoch: 2630 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2631 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.34, test_acc: 96.2%


 18%|█▊        | 2633/15000 [05:11<23:59,  8.59it/s]


 epoch: 2632 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2633 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.28, test_acc: 96.9%


 18%|█▊        | 2634/15000 [05:11<23:17,  8.85it/s]


 epoch: 2634 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.30, test_acc: 96.3%


 18%|█▊        | 2636/15000 [05:12<42:30,  4.85it/s]


 epoch: 2635 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2636 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.32, test_acc: 96.4%


 18%|█▊        | 2638/15000 [05:12<32:35,  6.32it/s]


 epoch: 2637 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2638 | train_loss: 0.32, train_acc: 96.2% | test_loss: 0.29, test_acc: 96.4%


 18%|█▊        | 2640/15000 [05:12<30:26,  6.77it/s]


 epoch: 2639 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.7%

input:       islands at around the same time art began to appear in new guinea including the earliest examples of sculpture

target:      islands at around the same time art began to appear in new guinea including the earliest examples of sculpture in

prediction:  islands at around the same time art began to appear in new guinea including the earliest examples of sculpture the

 epoch: 2640 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.27, test_acc: 96.9%


 18%|█▊        | 2642/15000 [05:12<26:13,  7.85it/s]


 epoch: 2641 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.5%

 epoch: 2642 | train_loss: 0.28, train_acc: 96.4% | test_loss: 0.27, test_acc: 96.9%

 epoch: 2643 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.6%


 18%|█▊        | 2646/15000 [05:13<20:21, 10.11it/s]


 epoch: 2644 | train_loss: 0.31, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2645 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.26, test_acc: 96.9%

 epoch: 2646 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.33, test_acc: 96.1%


 18%|█▊        | 2648/15000 [05:13<20:18, 10.14it/s]


 epoch: 2647 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.7%

 epoch: 2648 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2649 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.6%

input:       and lasted until about years ago when the archaic period began the classic stage followed the archaic period and

target:      and lasted until about years ago when the archaic period began the classic stage followed the archaic period and lasted

prediction:  and lasted until about years ago when the archaic period began the classic stage followed the archaic period and the


 18%|█▊        | 2652/15000 [05:14<26:57,  7.64it/s]


 epoch: 2650 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2651 | train_loss: 0.31, train_acc: 96.3% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2652 | train_loss: 0.28, train_acc: 97.1% | test_loss: 0.29, test_acc: 96.6%


 18%|█▊        | 2654/15000 [05:14<23:40,  8.69it/s]


 epoch: 2653 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2654 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2655 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.5%


 18%|█▊        | 2658/15000 [05:14<20:17, 10.14it/s]


 epoch: 2656 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.3%

 epoch: 2657 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2658 | train_loss: 0.28, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.2%


 18%|█▊        | 2660/15000 [05:14<20:23, 10.08it/s]


 epoch: 2659 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.32, test_acc: 96.1%

input:       relativity has not yet been unified with the other fundamental descriptions several candidate theories of quantum gravity are being

target:      relativity has not yet been unified with the other fundamental descriptions several candidate theories of quantum gravity are being developed

prediction:  relativity has not yet been unified with the other fundamental descriptions several candidate theories of quantum gravity are being the

 epoch: 2660 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.9%


 18%|█▊        | 2662/15000 [05:14<19:55, 10.32it/s]


 epoch: 2661 | train_loss: 0.31, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.5%

 epoch: 2662 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%

 epoch: 2663 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.30, test_acc: 96.5%


 18%|█▊        | 2666/15000 [05:15<28:22,  7.24it/s]


 epoch: 2664 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.8%

 epoch: 2665 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2666 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.6%


 18%|█▊        | 2668/15000 [05:15<24:55,  8.25it/s]


 epoch: 2667 | train_loss: 0.29, train_acc: 96.9% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2668 | train_loss: 0.31, train_acc: 96.7% | test_loss: 0.32, test_acc: 96.5%

 epoch: 2669 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.8%


 18%|█▊        | 2670/15000 [05:16<23:37,  8.70it/s]


input:       the library as separate entity from the language however language core library is often treated as part of the

target:      the library as separate entity from the language however language core library is often treated as part of the language

prediction:  the library as separate entity from the language however language core library is often treated as part of the the

 epoch: 2670 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2671 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.6%


 18%|█▊        | 2674/15000 [05:16<20:14, 10.15it/s]


 epoch: 2672 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.5%

 epoch: 2673 | train_loss: 0.28, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2674 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.6%


 18%|█▊        | 2676/15000 [05:16<18:59, 10.82it/s]


 epoch: 2675 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2676 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.27, test_acc: 96.8%

 epoch: 2677 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.4%


 18%|█▊        | 2680/15000 [05:17<23:42,  8.66it/s]


 epoch: 2678 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2679 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.2%

input:       presence of either type of horizon depends on the details of the flrw model that describes our universe our

target:      presence of either type of horizon depends on the details of the flrw model that describes our universe our understanding

prediction:  presence of either type of horizon depends on the details of the flrw model that describes our universe our the

 epoch: 2680 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.6%


 18%|█▊        | 2682/15000 [05:17<21:41,  9.47it/s]


 epoch: 2681 | train_loss: 0.31, train_acc: 96.8% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2682 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2683 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.33, test_acc: 96.0%


 18%|█▊        | 2686/15000 [05:17<19:11, 10.69it/s]


 epoch: 2684 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2685 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2686 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.8%


 18%|█▊        | 2688/15000 [05:17<18:29, 11.10it/s]


 epoch: 2687 | train_loss: 0.27, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.8%

 epoch: 2688 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2689 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.33, test_acc: 96.2%


 18%|█▊        | 2692/15000 [05:18<17:53, 11.46it/s]


input:       denotes actions and events the first group which includes english words such as dog and song are usually called

target:      denotes actions and events the first group which includes english words such as dog and song are usually called nouns

prediction:  denotes actions and events the first group which includes english words such as dog and song are usually called the

 epoch: 2690 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.34, test_acc: 96.4%

 epoch: 2691 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2692 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.9%


 18%|█▊        | 2694/15000 [05:18<18:25, 11.13it/s]


 epoch: 2693 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.5%

 epoch: 2694 | train_loss: 0.28, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2695 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.4%


 18%|█▊        | 2698/15000 [05:18<17:22, 11.80it/s]


 epoch: 2696 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2697 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.8%

 epoch: 2698 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.7%


 18%|█▊        | 2700/15000 [05:18<17:54, 11.44it/s]


 epoch: 2699 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.4%

input:       during the hellenistic period the importance of greece proper the territory of modern greece within the greek speaking world

target:      during the hellenistic period the importance of greece proper the territory of modern greece within the greek speaking world declined

prediction:  during the hellenistic period the importance of greece proper the territory of modern greece within the greek speaking world the

 epoch: 2700 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2701 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.6%


 18%|█▊        | 2704/15000 [05:19<17:31, 11.69it/s]


 epoch: 2702 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.5%

 epoch: 2703 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.8%

 epoch: 2704 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.6%


 18%|█▊        | 2706/15000 [05:19<17:53, 11.45it/s]


 epoch: 2705 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2706 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.7%


 18%|█▊        | 2708/15000 [05:20<31:56,  6.41it/s]


 epoch: 2707 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2708 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.33, test_acc: 96.3%

 epoch: 2709 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.6%


 18%|█▊        | 2710/15000 [05:20<28:46,  7.12it/s]


input:       third of the surface of africa has coastline of km mi from the most northerly point ras ben sakka

target:      third of the surface of africa has coastline of km mi from the most northerly point ras ben sakka in

prediction:  third of the surface of africa has coastline of km mi from the most northerly point ras politics kadesh of

 epoch: 2710 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2711 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.4%


 18%|█▊        | 2714/15000 [05:20<22:57,  8.92it/s]


 epoch: 2712 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2713 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2714 | train_loss: 0.29, train_acc: 96.3% | test_loss: 0.29, test_acc: 96.6%


 18%|█▊        | 2716/15000 [05:20<21:14,  9.64it/s]


 epoch: 2715 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.3%

 epoch: 2716 | train_loss: 0.32, train_acc: 96.1% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2717 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.24, test_acc: 97.2%


 18%|█▊        | 2720/15000 [05:21<20:10, 10.14it/s]


 epoch: 2718 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.6%

 epoch: 2719 | train_loss: 0.33, train_acc: 96.2% | test_loss: 0.28, test_acc: 96.8%

input:       composed of the same sound type which can only be distinguished by the number of repeated elements several species

target:      composed of the same sound type which can only be distinguished by the number of repeated elements several species of

prediction:  composed of the same sound type which can only be distinguished by the number of repeated elements several species the

 epoch: 2720 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.4%


 18%|█▊        | 2722/15000 [05:21<31:13,  6.55it/s]


 epoch: 2721 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.5%

 epoch: 2722 | train_loss: 0.29, train_acc: 96.9% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2723 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.3%


 18%|█▊        | 2726/15000 [05:22<23:56,  8.54it/s]


 epoch: 2724 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.6%

 epoch: 2725 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.8%

 epoch: 2726 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.3%


 18%|█▊        | 2728/15000 [05:22<21:47,  9.38it/s]


 epoch: 2727 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.28, test_acc: 96.8%

 epoch: 2728 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.27, test_acc: 96.7%

 epoch: 2729 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.7%


 18%|█▊        | 2730/15000 [05:22<21:17,  9.60it/s]


input:       and serves as social bonding function humour may also help the transition in helping the individual to maintain positive

target:      and serves as social bonding function humour may also help the transition in helping the individual to maintain positive feelings

prediction:  and serves as social bonding function humour may also help the transition in helping the individual to maintain positive the

 epoch: 2730 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 2731 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.9%


 18%|█▊        | 2734/15000 [05:22<19:29, 10.48it/s]


 epoch: 2732 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.7%

 epoch: 2733 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.8%

 epoch: 2734 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2735 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.5%


 18%|█▊        | 2738/15000 [05:23<24:35,  8.31it/s]


 epoch: 2736 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.5%

 epoch: 2737 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2738 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.6%


 18%|█▊        | 2740/15000 [05:23<25:47,  7.92it/s]


 epoch: 2739 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.7%

input:       purpose command line interpreters such as netsh and wmic with their own command sets but they were not interoperable

target:      purpose command line interpreters such as netsh and wmic with their own command sets but they were not interoperable windows

prediction:  purpose command line interpreters such as netsh and wmic with their own command sets but they were not interoperable the

 epoch: 2740 | train_loss: 0.28, train_acc: 96.2% | test_loss: 0.29, test_acc: 96.5%


 18%|█▊        | 2742/15000 [05:23<24:46,  8.25it/s]


 epoch: 2741 | train_loss: 0.32, train_acc: 95.7% | test_loss: 0.31, test_acc: 96.6%

 epoch: 2742 | train_loss: 0.27, train_acc: 96.5% | test_loss: 0.28, test_acc: 96.9%


 18%|█▊        | 2744/15000 [05:24<23:18,  8.76it/s]


 epoch: 2743 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2744 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.5%


 18%|█▊        | 2746/15000 [05:24<23:22,  8.73it/s]


 epoch: 2745 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.27, test_acc: 96.8%

 epoch: 2746 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.7%


 18%|█▊        | 2748/15000 [05:24<23:33,  8.67it/s]


 epoch: 2747 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.6%

 epoch: 2748 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.6%


 18%|█▊        | 2749/15000 [05:24<24:02,  8.49it/s]


 epoch: 2749 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.27, test_acc: 96.8%

input:       continent in the world and it is not rare for individuals to fluently speak not only multiple african languages

target:      continent in the world and it is not rare for individuals to fluently speak not only multiple african languages but

prediction:  continent in the world and it is not rare for individuals to fluently speak not only multiple african languages the


 18%|█▊        | 2751/15000 [05:25<39:16,  5.20it/s]


 epoch: 2750 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2751 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.5%


 18%|█▊        | 2754/15000 [05:25<28:47,  7.09it/s]


 epoch: 2752 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2753 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2754 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.33, test_acc: 96.3%


 18%|█▊        | 2756/15000 [05:25<25:00,  8.16it/s]


 epoch: 2755 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2756 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.34, test_acc: 96.2%

 epoch: 2757 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.6%


 18%|█▊        | 2759/15000 [05:26<22:47,  8.95it/s]


 epoch: 2758 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.8%

 epoch: 2759 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.9%

input:       big break working in new york productions outside new york city many cities have professional regional or resident theater

target:     

 18%|█▊        | 2760/15000 [05:26<24:57,  8.17it/s]

 big break working in new york productions outside new york city many cities have professional regional or resident theater companies

prediction:  big break working in new york productions outside new york city many cities have professional regional or resident theater the

 epoch: 2760 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.8%

 epoch: 2761 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.31, test_acc: 96.4%


 18%|█▊        | 2763/15000 [05:26<23:01,  8.86it/s]


 epoch: 2762 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.32, test_acc: 96.3%

 epoch: 2763 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.4%


 18%|█▊        | 2766/15000 [05:27<25:02,  8.14it/s]


 epoch: 2764 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.30, test_acc: 96.7%

 epoch: 2765 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.5%

 epoch: 2766 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.30, test_acc: 96.8%


 18%|█▊        | 2768/15000 [05:27<22:13,  9.17it/s]


 epoch: 2767 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.8%

 epoch: 2768 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2769 | train_loss: 0.32, train_acc: 96.4% | test_loss: 0.31, test_acc: 96.7%


 18%|█▊        | 2770/15000 [05:27<21:39,  9.41it/s]


input:       of this pantheon changed continually as new deities were promoted in the hierarchy but priests made no effort to

target:      of this pantheon changed continually as new deities were promoted in the hierarchy but priests made no effort to organize

prediction:  of this pantheon changed continually as new deities were promoted in the hierarchy but priests made no effort to the

 epoch: 2770 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2771 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.7%


 18%|█▊        | 2774/15000 [05:27<19:35, 10.40it/s]


 epoch: 2772 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.32, test_acc: 96.4%

 epoch: 2773 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2774 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.8%


 19%|█▊        | 2776/15000 [05:27<19:03, 10.69it/s]


 epoch: 2775 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.6%

 epoch: 2776 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.26, test_acc: 96.8%

 epoch: 2777 | train_loss: 0.27, train_acc: 96.4% | test_loss: 0.28, test_acc: 96.8%


 19%|█▊        | 2780/15000 [05:28<21:10,  9.62it/s]


 epoch: 2778 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.8%

 epoch: 2779 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.31, test_acc: 96.4%

input:       map for his patron one of chiana valley tuscany so as to give his patron better overlay of the

target:      map for his patron one of chiana valley tuscany so as to give his patron better overlay of the land

prediction:  map for his patron one of chiana valley tuscany so as to give his patron better overlay of the the

 epoch: 2780 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.8%


 19%|█▊        | 2782/15000 [05:28<19:43, 10.33it/s]


 epoch: 2781 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2782 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.8%

 epoch: 2783 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.7%


 19%|█▊        | 2786/15000 [05:28<18:24, 11.05it/s]


 epoch: 2784 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.27, test_acc: 96.8%

 epoch: 2785 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.5%

 epoch: 2786 | train_loss: 0.32, train_acc: 95.9% | test_loss: 0.26, test_acc: 96.9%


 19%|█▊        | 2788/15000 [05:29<17:54, 11.37it/s]


 epoch: 2787 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.9%

 epoch: 2788 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2789 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.4%


 19%|█▊        | 2790/15000 [05:29<18:57, 10.73it/s]


input:       help users navigate the site different views and new fonts to better inform redditors if they are clicking on

target:      help users navigate the site different views and new fonts to better inform redditors if they are clicking on reddit

prediction:  help users navigate the site different views and new dominated to better inform redditors if they are clicking on the

 epoch: 2790 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2791 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.30, test_acc: 96.6%


 19%|█▊        | 2794/15000 [05:30<28:06,  7.24it/s]


 epoch: 2792 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%

 epoch: 2793 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2794 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.32, test_acc: 96.5%


 19%|█▊        | 2796/15000 [05:30<24:30,  8.30it/s]


 epoch: 2795 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2796 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.2%

 epoch: 2797 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.5%


 19%|█▊        | 2800/15000 [05:30<21:46,  9.33it/s]


 epoch: 2798 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.0%

 epoch: 2799 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.32, test_acc: 96.2%

input:       mountains along with chilean expatriates and liberated chile he organized fleet to reach peru by sea and sought the

target:      mountains along with chilean expatriates and liberated chile he organized fleet to reach peru by sea and sought the military

prediction:  mountains along with chilean expatriates and liberated chile he organized fleet to reach peru by sea and sought the the

 epoch: 2800 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.33, test_acc: 96.1%


 19%|█▊        | 2802/15000 [05:30<20:44,  9.80it/s]


 epoch: 2801 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.7%

 epoch: 2802 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.8%

 epoch: 2803 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.28, test_acc: 97.0%


 19%|█▊        | 2806/15000 [05:31<19:24, 10.47it/s]


 epoch: 2804 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2805 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2806 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.27, test_acc: 97.0%


 19%|█▊        | 2809/15000 [05:31<30:41,  6.62it/s]


 epoch: 2807 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.9%

 epoch: 2808 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.7%

 epoch: 2809 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.28, test_acc: 96.6%


 19%|█▊        | 2810/15000 [05:31<29:23,  6.91it/s]


input:       are divided highways chile has about km of roads km of which are paved and about km are divided

target:      are divided highways chile has about km of roads km of which are paved and about km are divided highways

prediction:  are divided highways chile has about km of roads km of which are paved and about km are divided the

 epoch: 2810 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.28, test_acc: 97.1%

 epoch: 2811 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.7%


 19%|█▉        | 2814/15000 [05:32<22:07,  9.18it/s]


 epoch: 2812 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.5%

 epoch: 2813 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2814 | train_loss: 0.27, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.5%


 19%|█▉        | 2816/15000 [05:32<20:27,  9.92it/s]


 epoch: 2815 | train_loss: 0.33, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2816 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2817 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.9%


 19%|█▉        | 2818/15000 [05:32<19:44, 10.28it/s]


 epoch: 2818 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.3%

 epoch: 2819 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%

input:       cerebral ventricles with the use of melted wax and constructed glass aorta to observe the circulation of blood through

target:      cerebral ventricles with the use of melted wax and constructed glass aorta to observe the circulation of blood through the

prediction:  cerebral ventricles with the use of melted wax and constructed glass aorta to observe the circulation of blood through the


 19%|█▉        | 2820/15000 [05:32<20:25,  9.94it/s]


 epoch: 2820 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.7%


 19%|█▉        | 2822/15000 [05:33<26:44,  7.59it/s]


 epoch: 2821 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2822 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2823 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.0%


 19%|█▉        | 2826/15000 [05:33<21:30,  9.43it/s]


 epoch: 2824 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.33, test_acc: 96.5%

 epoch: 2825 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2826 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.8%


 19%|█▉        | 2828/15000 [05:33<20:11, 10.04it/s]


 epoch: 2827 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.31, test_acc: 96.7%

 epoch: 2828 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.31, test_acc: 96.0%

 epoch: 2829 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.5%


 19%|█▉        | 2830/15000 [05:33<20:12, 10.03it/s]


input:       of bah ll since the middle of the th century growth has particularly occurred in other asian countries because

target:      of bah ll since the middle of the th century growth has particularly occurred in other asian countries because bah

prediction:  of bah ll since the middle of the th century growth has particularly occurred in other asian countries because the

 epoch: 2830 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.8%

 epoch: 2831 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.5%


 19%|█▉        | 2834/15000 [05:34<19:02, 10.65it/s]


 epoch: 2832 | train_loss: 0.27, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.7%

 epoch: 2833 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.6%

 epoch: 2834 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.25, test_acc: 96.9%

 epoch: 2835 | train_loss: 0.29, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.9%


 19%|█▉        | 2838/15000 [05:35<27:15,  7.43it/s]


 epoch: 2836 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2837 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.8%

 epoch: 2838 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.25, test_acc: 96.9%


 19%|█▉        | 2840/15000 [05:35<24:59,  8.11it/s]


 epoch: 2839 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.7%

input:       on may at the age of possibly of stroke francis had become close friend vasari describes leonardo as lamenting

target:      on may at the age of possibly of stroke francis had become close friend vasari describes leonardo as lamenting on

prediction:  on may at the age of possibly of stroke francis had become close friend vasari describes leonardo as happy the

 epoch: 2840 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2841 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.7%


 19%|█▉        | 2844/15000 [05:35<20:36,  9.83it/s]


 epoch: 2842 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2843 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2844 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.4%


 19%|█▉        | 2846/15000 [05:35<20:06, 10.07it/s]


 epoch: 2845 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.1%

 epoch: 2846 | train_loss: 0.28, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2847 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.7%


 19%|█▉        | 2848/15000 [05:35<19:25, 10.42it/s]


 epoch: 2848 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.8%

 epoch: 2849 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.5%

input:       on the principle of population by thomas robert malthus darwin noted that population growth would lead to struggle for

target:      on the principle of population by thomas robert malthus darwin noted that population growth would lead to struggle for existence

prediction:  on the principle of population by thomas robert malthus darwin noted that population growth would lead to struggle for the


 19%|█▉        | 2851/15000 [05:36<31:58,  6.33it/s]


 epoch: 2850 | train_loss: 0.28, train_acc: 96.4% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2851 | train_loss: 0.25, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.4%


 19%|█▉        | 2854/15000 [05:37<26:07,  7.75it/s]


 epoch: 2852 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.30, test_acc: 96.8%

 epoch: 2853 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.6%

 epoch: 2854 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.6%


 19%|█▉        | 2856/15000 [05:37<24:48,  8.16it/s]


 epoch: 2855 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.30, test_acc: 96.7%

 epoch: 2856 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.5%


 19%|█▉        | 2859/15000 [05:37<22:56,  8.82it/s]


 epoch: 2857 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.27, test_acc: 97.1%

 epoch: 2858 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.25, test_acc: 96.8%

 epoch: 2859 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.4%

input:       high priests at the temple of amun in thebes accumulated vast tracts of land and wealth and their expanded


 19%|█▉        | 2861/15000 [05:37<25:08,  8.05it/s]


target:      high priests at the temple of amun in thebes accumulated vast tracts of land and wealth and their expanded power

prediction:  high priests at the temple of amun in thebes accumulated vast tracts of land and wealth and their expanded the

 epoch: 2860 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.9%

 epoch: 2861 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.9%


 19%|█▉        | 2863/15000 [05:38<24:14,  8.35it/s]


 epoch: 2862 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.9%

 epoch: 2863 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%


 19%|█▉        | 2865/15000 [05:38<24:07,  8.38it/s]


 epoch: 2864 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.8%

 epoch: 2865 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.8%


 19%|█▉        | 2867/15000 [05:38<23:00,  8.79it/s]


 epoch: 2866 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2867 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.29, test_acc: 96.8%


 19%|█▉        | 2869/15000 [05:38<23:27,  8.62it/s]


 epoch: 2868 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.31, test_acc: 96.2%

 epoch: 2869 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.5%

input:      

 19%|█▉        | 2870/15000 [05:39<26:37,  7.60it/s]

 that transcends nature single person is marvellously endowed by heaven with beauty grace and talent in such abundance that

target:      that transcends nature single person is marvellously endowed by heaven with beauty grace and talent in such abundance that he

prediction:  that controlling nature single person is marvellously endowed by heaven with beauty grace and talent in such abundance that the

 epoch: 2870 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2871 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.9%


 19%|█▉        | 2873/15000 [05:39<22:38,  8.93it/s]


 epoch: 2872 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2873 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.27, test_acc: 96.7%


 19%|█▉        | 2875/15000 [05:39<23:10,  8.72it/s]


 epoch: 2874 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.27, test_acc: 96.9%

 epoch: 2875 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.9%


 19%|█▉        | 2877/15000 [05:39<22:56,  8.81it/s]


 epoch: 2876 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.5%

 epoch: 2877 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%


 19%|█▉        | 2879/15000 [05:40<42:19,  4.77it/s]


 epoch: 2878 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.28, test_acc: 96.4%

 epoch: 2879 | train_loss: 0.29, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.6%

input:       physics curriculum usually contains few classes in an applied discipline like geology or electrical engineering it usually differs from

target:      physics curriculum usually contains few classes in an applied discipline like geology or electrical engineering it usually differs from engineering


 19%|█▉        | 2880/15000 [05:40<37:16,  5.42it/s]


prediction:  physics curriculum usually contains few classes in an applied discipline like geology or electrical engineering it usually differs from the

 epoch: 2880 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.6%

 epoch: 2881 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2882 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.29, test_acc: 97.2%


 19%|█▉        | 2884/15000 [05:40<23:32,  8.58it/s]


 epoch: 2883 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2884 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2885 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 19%|█▉        | 2888/15000 [05:41<19:50, 10.18it/s]


 epoch: 2886 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2887 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%

 epoch: 2888 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.7%


 19%|█▉        | 2890/15000 [05:41<20:02, 10.07it/s]


 epoch: 2889 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.32, test_acc: 96.1%

input:       the elder wrote in detail of the many minerals and metals then in practical use even correctly noting the

target:      the elder wrote in detail of the many minerals and metals then in practical use even correctly noting the origin

prediction:  the elder wrote in detail of the many minerals and metals then in practical use even correctly noting the the

 epoch: 2890 | train_loss: 0.32, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.5%


 19%|█▉        | 2892/15000 [05:41<19:36, 10.29it/s]


 epoch: 2891 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.27, test_acc: 96.9%

 epoch: 2892 | train_loss: 0.27, train_acc: 96.5% | test_loss: 0.31, test_acc: 96.5%


 19%|█▉        | 2894/15000 [05:42<33:19,  6.05it/s]


 epoch: 2893 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2894 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2895 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.4%


 19%|█▉        | 2898/15000 [05:42<24:35,  8.20it/s]


 epoch: 2896 | train_loss: 0.31, train_acc: 96.4% | test_loss: 0.28, test_acc: 96.8%

 epoch: 2897 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.8%

 epoch: 2898 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.7%


 19%|█▉        | 2900/15000 [05:42<23:33,  8.56it/s]


 epoch: 2899 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.6%

input:       also suitable for the production of coffee beans and other crops north america is very large continent that extends

target:      also suitable for the production of coffee beans and other crops north america is very large continent that extends from

prediction:  also suitable for the production of coffee beans and other crops north america is very large continent that extends the

 epoch: 2900 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.30, test_acc: 96.3%


 19%|█▉        | 2902/15000 [05:43<21:53,  9.21it/s]


 epoch: 2901 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.9%

 epoch: 2902 | train_loss: 0.29, train_acc: 96.8% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2903 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.8%


 19%|█▉        | 2906/15000 [05:43<18:55, 10.65it/s]


 epoch: 2904 | train_loss: 0.30, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.5%

 epoch: 2905 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2906 | train_loss: 0.29, train_acc: 96.4% | test_loss: 0.26, test_acc: 97.0%


 19%|█▉        | 2908/15000 [05:43<22:00,  9.16it/s]


 epoch: 2907 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2908 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%

 epoch: 2909 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.4%


 19%|█▉        | 2910/15000 [05:43<21:34,  9.34it/s]


input:       the first dynasty pharaohs an egyptian colony stationed in southern canaan dates to slightly before the first dynasty narmer

target:      the first dynasty pharaohs an egyptian colony stationed in southern canaan dates to slightly before the first dynasty narmer had

prediction:  the first dynasty pharaohs an egyptian colony stationed in southern canaan dates to slightly before the first dynasty narmer the

 epoch: 2910 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.4%

 epoch: 2911 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.8%


 19%|█▉        | 2914/15000 [05:44<18:59, 10.61it/s]


 epoch: 2912 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.8%

 epoch: 2913 | train_loss: 0.28, train_acc: 97.0% | test_loss: 0.31, test_acc: 96.2%

 epoch: 2914 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.7%


 19%|█▉        | 2916/15000 [05:44<18:09, 11.09it/s]


 epoch: 2915 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2916 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.9%

 epoch: 2917 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.31, test_acc: 96.7%


 19%|█▉        | 2920/15000 [05:44<18:46, 10.72it/s]


 epoch: 2918 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2919 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.28, test_acc: 96.6%

input:       another self replicating unit with potential significance in explaining human behavior and cultural evolution dawkins used the term to

target:      another self replicating unit with potential significance in explaining human behavior and cultural evolution dawkins used the term to refer

prediction:  another self replicating unit with potential significance in explaining human behavior and cultural evolution dawkins used the term to the

 epoch: 2920 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.7%


 19%|█▉        | 2922/15000 [05:45<32:22,  6.22it/s]


 epoch: 2921 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.8%

 epoch: 2922 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.5%

 epoch: 2923 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%


 20%|█▉        | 2926/15000 [05:45<24:11,  8.32it/s]


 epoch: 2924 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.30, test_acc: 96.8%

 epoch: 2925 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.27, test_acc: 97.0%

 epoch: 2926 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.30, test_acc: 96.6%


 20%|█▉        | 2928/15000 [05:45<21:59,  9.15it/s]


 epoch: 2927 | train_loss: 0.30, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.7%

 epoch: 2928 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 2929 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.5%


 20%|█▉        | 2930/15000 [05:46<21:20,  9.43it/s]


input:       active research continues to work toward understanding the genetic and environmental bases of behavior and their interaction psychology encompasses

target:      active research continues to work toward understanding the genetic and environmental bases of behavior and their interaction psychology encompasses many

prediction:  active research continues to work toward understanding the genetic and environmental bases of behavior and their interaction psychology encompasses the

 epoch: 2930 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2931 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.8%


 20%|█▉        | 2934/15000 [05:46<19:03, 10.56it/s]


 epoch: 2932 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2933 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.8%

 epoch: 2934 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.31, test_acc: 96.4%

 epoch: 2935 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.27, test_acc: 97.1%

 epoch: 2936 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.31, test_acc: 96.9%

 20%|█▉        | 2938/15000 [05:46<20:20,  9.88it/s]



 epoch: 2937 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.26, test_acc: 97.1%

 epoch: 2938 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.30, test_acc: 96.5%


 20%|█▉        | 2940/15000 [05:46<19:59, 10.05it/s]


 epoch: 2939 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.7%

input:       at naissus succeeded him as the emperor and continued the restoration of the empire aurelian reigned through the worst

target:      at naissus succeeded him as the emperor and continued the restoration of the empire aurelian reigned through the worst of

prediction:  at naissus succeeded him as the emperor and continued the restoration of the empire aurelian reigned through the worst the

 epoch: 2940 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.25, test_acc: 97.1%

 epoch: 2941 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.33, test_acc: 96.2%


 20%|█▉        | 2944/15000 [05:47<17:53, 11.23it/s]


 epoch: 2942 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.0%

 epoch: 2943 | train_loss: 0.27, train_acc: 97.2% | test_loss: 0.29, test_acc: 96.4%

 epoch: 2944 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.5%


 20%|█▉        | 2946/15000 [05:47<17:45, 11.31it/s]


 epoch: 2945 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.31, test_acc: 96.1%

 epoch: 2946 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%

 epoch: 2947 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.28, test_acc: 97.0%


 20%|█▉        | 2948/15000 [05:47<17:39, 11.37it/s]


 epoch: 2948 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.7%

 epoch: 2949 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%

input:       is the national sport of new zealand samoa fiji and tonga the most popular overall sport in australia is

target:      is the national sport of new zealand samoa fiji and tonga the most popular overall sport in australia is cricket

prediction:  is the national sport of new zealand samoa fiji and tonga the most popular overall sport in australia is the


 20%|█▉        | 2952/15000 [05:48<27:09,  7.39it/s]


 epoch: 2950 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.7%

 epoch: 2951 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.7%

 epoch: 2952 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.4%


 20%|█▉        | 2954/15000 [05:48<23:55,  8.39it/s]


 epoch: 2953 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.31, test_acc: 96.5%

 epoch: 2954 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.30, test_acc: 96.5%

 epoch: 2955 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.8%


 20%|█▉        | 2958/15000 [05:48<20:11,  9.94it/s]


 epoch: 2956 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.31, test_acc: 96.2%

 epoch: 2957 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.1%

 epoch: 2958 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.8%


 20%|█▉        | 2960/15000 [05:49<20:30,  9.78it/s]


 epoch: 2959 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.7%

input:       history with over million copies sold and nearly million monthly active players as of update it has been ported

target:      history with over million copies sold and nearly million monthly active players as of update it has been ported to

prediction:  history with over million copies sold and nearly million monthly active players as of update it has been ported the

 epoch: 2960 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%


 20%|█▉        | 2962/15000 [05:49<19:45, 10.15it/s]


 epoch: 2961 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%

 epoch: 2962 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.28, test_acc: 96.9%

 epoch: 2963 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.28, test_acc: 96.5%


 20%|█▉        | 2964/15000 [05:49<19:06, 10.49it/s]


 epoch: 2964 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%


 20%|█▉        | 2966/15000 [05:50<31:11,  6.43it/s]


 epoch: 2965 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.26, test_acc: 96.9%

 epoch: 2966 | train_loss: 0.30, train_acc: 96.3% | test_loss: 0.31, test_acc: 96.3%

 epoch: 2967 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.6%


 20%|█▉        | 2970/15000 [05:50<24:41,  8.12it/s]


 epoch: 2968 | train_loss: 0.27, train_acc: 96.5% | test_loss: 0.30, test_acc: 96.3%

 epoch: 2969 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.8%

input:       john massironi founded the american culinary federation in taking after similar organizations across europe in the chef james beard

target:      john massironi founded the american culinary federation in taking after similar organizations across europe in the chef james beard hosted

prediction:  john massironi founded the american culinary federation in taking after similar organizations across europe in the chef james beard the

 epoch: 2970 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 20%|█▉        | 2972/15000 [05:50<24:02,  8.34it/s]


 epoch: 2971 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.7%

 epoch: 2972 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.6%


 20%|█▉        | 2974/15000 [05:50<23:05,  8.68it/s]


 epoch: 2973 | train_loss: 0.30, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2974 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.27, test_acc: 97.2%


 20%|█▉        | 2976/15000 [05:51<23:28,  8.54it/s]


 epoch: 2975 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.8%

 epoch: 2976 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.3%


 20%|█▉        | 2977/15000 [05:51<24:23,  8.21it/s]


 epoch: 2977 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.9%


 20%|█▉        | 2979/15000 [05:51<40:52,  4.90it/s]


 epoch: 2978 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.27, test_acc: 96.9%

 epoch: 2979 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.8%

input:       hull and had mastered advanced forms of shipbuilding as early as bc the archaeological institute of america reports that


 20%|█▉        | 2981/15000 [05:52<34:11,  5.86it/s]


target:      hull and had mastered advanced forms of shipbuilding as early as bc the archaeological institute of america reports that the

prediction:  hull and had mastered advanced forms of data as early as bc the archaeological institute of america reports that the

 epoch: 2980 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.5%

 epoch: 2981 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.5%


 20%|█▉        | 2983/15000 [05:52<27:37,  7.25it/s]


 epoch: 2982 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.29, test_acc: 96.6%

 epoch: 2983 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.30, test_acc: 96.7%


 20%|█▉        | 2985/15000 [05:52<25:04,  7.99it/s]


 epoch: 2984 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.27, test_acc: 96.9%

 epoch: 2985 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.30, test_acc: 96.7%


 20%|█▉        | 2987/15000 [05:52<23:17,  8.60it/s]


 epoch: 2986 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.27, test_acc: 96.8%

 epoch: 2987 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.9%


 20%|█▉        | 2989/15000 [05:53<22:49,  8.77it/s]


 epoch: 2988 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.6%

 epoch: 2989 | train_loss: 0.31, train_acc: 96.5% | test_loss: 0.27, test_acc: 97.0%

input:       for of the world population similar number of people speak the afroasiatic languages which include the populous semitic languages

target:      for of the world population similar number of people speak the afroasiatic languages which include the populous semitic languages such


 20%|█▉        | 2991/15000 [05:53<25:36,  7.82it/s]


prediction:  for of the world population similar number of people speak the afroasiatic languages which include the populous semitic languages the

 epoch: 2990 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.9%

 epoch: 2991 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%


 20%|█▉        | 2993/15000 [05:53<25:08,  7.96it/s]


 epoch: 2992 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.4%

 epoch: 2993 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.26, test_acc: 96.8%


 20%|█▉        | 2995/15000 [05:53<22:58,  8.71it/s]


 epoch: 2994 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.8%

 epoch: 2995 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%


 20%|█▉        | 2997/15000 [05:54<21:26,  9.33it/s]


 epoch: 2996 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.3%

 epoch: 2997 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.28, test_acc: 96.5%

 epoch: 2998 | train_loss: 0.28, train_acc: 96.4% | test_loss: 0.26, test_acc: 97.1%


 20%|██        | 3000/15000 [05:54<25:13,  7.93it/s]


 epoch: 2999 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.7%

input:       oil were sacralized by roman christianity while germanic meat consumption became mark of paganism some philosophers and christians resisted

target:      oil were sacralized by roman christianity while germanic meat consumption became mark of paganism some philosophers and christians resisted the

prediction:  oil were basis by roman christianity while germanic meat consumption became mark of paganism some philosophers and christians resisted the

 epoch: 3000 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.5%


 20%|██        | 3002/15000 [05:54<21:50,  9.15it/s]


 epoch: 3001 | train_loss: 0.31, train_acc: 96.1% | test_loss: 0.31, test_acc: 96.7%

 epoch: 3002 | train_loss: 0.29, train_acc: 97.0% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3003 | train_loss: 0.31, train_acc: 96.2% | test_loss: 0.28, test_acc: 96.6%


 20%|██        | 3006/15000 [05:54<18:53, 10.58it/s]


 epoch: 3004 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3005 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3006 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.27, test_acc: 97.0%


 20%|██        | 3008/15000 [05:55<19:33, 10.22it/s]


 epoch: 3007 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3008 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3009 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.26, test_acc: 97.0%


 20%|██        | 3010/15000 [05:55<19:59, 10.00it/s]


input:       japanese archipelago but that has been administered by russia since world war ii it is located within marginal sea

target:      japanese archipelago but that has been administered by russia since world war ii it is located within marginal sea of

prediction:  japanese archipelago but that has been administered by russia since world war ii it is located within marginal sea the

 epoch: 3010 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3011 | train_loss: 0.28, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.8%


 20%|██        | 3014/15000 [05:55<18:11, 10.98it/s]


 epoch: 3012 | train_loss: 0.30, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.7%

 epoch: 3013 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.6%

 epoch: 3014 | train_loss: 0.28, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.8%


 20%|██        | 3016/15000 [05:55<17:43, 11.27it/s]


 epoch: 3015 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3016 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3017 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%


 20%|██        | 3020/15000 [05:56<18:25, 10.83it/s]


 epoch: 3018 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3019 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.4%

input:       the player would be banned from all servers for specific period of time or permanently the update containing the

target:      the player would be banned from all servers for specific period of time or permanently the update containing the report

prediction:  the player would be banned from all servers for specific period of time or permanently the update containing the the

 epoch: 3020 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.5%


 20%|██        | 3022/15000 [05:56<31:20,  6.37it/s]


 epoch: 3021 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.8%

 epoch: 3022 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.5%

 epoch: 3023 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.6%


 20%|██        | 3026/15000 [05:57<23:30,  8.49it/s]


 epoch: 3024 | train_loss: 0.26, train_acc: 96.6% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3025 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3026 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.29, test_acc: 96.7%


 20%|██        | 3028/15000 [05:57<21:16,  9.38it/s]


 epoch: 3027 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.5%

 epoch: 3028 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3029 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 20%|██        | 3030/15000 [05:57<21:08,  9.44it/s]


input:       associates the same year the persecutions continued until tiberius death in at the time of tiberius death most of

target:      associates the same year the persecutions continued until tiberius death in at the time of tiberius death most of the

prediction:  associates the same year the persecutions continued until tiberius death in at the time of tiberius death most of the

 epoch: 3030 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3031 | train_loss: 0.28, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.7%


 20%|██        | 3034/15000 [05:57<19:05, 10.44it/s]


 epoch: 3032 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3033 | train_loss: 0.26, train_acc: 96.6% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3034 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.6%

 epoch: 3035 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.2%


 20%|██        | 3038/15000 [05:58<24:29,  8.14it/s]


 epoch: 3036 | train_loss: 0.28, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3037 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.7%

 epoch: 3038 | train_loss: 0.28, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.6%


 20%|██        | 3040/15000 [05:58<23:06,  8.63it/s]


 epoch: 3039 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.6%

input:       pocket edition was released on ios android and windows phone pocket edition was remade into the bedrock edition in

target:      pocket edition was released on ios android and windows phone pocket edition was remade into the bedrock edition in enabling

prediction:  pocket edition was released on ios android and windows phone pocket edition was remade into the bedrock edition in the

 epoch: 3040 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.7%

 epoch: 3041 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.4%


 20%|██        | 3044/15000 [05:59<19:25, 10.26it/s]


 epoch: 3042 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3043 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3044 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.8%


 20%|██        | 3046/15000 [05:59<18:36, 10.71it/s]


 epoch: 3045 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.7%

 epoch: 3046 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.27, test_acc: 96.5%

 epoch: 3047 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 20%|██        | 3048/15000 [05:59<18:28, 10.79it/s]


 epoch: 3048 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3049 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.28, test_acc: 96.7%

input:       workshop was done by his assistants according to vasari leonardo collaborated with verrocchio on his the baptism of christ

target:      workshop was done by his assistants according to vasari leonardo collaborated with verrocchio on his the baptism of christ painting

prediction:  workshop was done by his assistants according to vasari leonardo collaborated with verrocchio on his the baptism of christ the


 20%|██        | 3052/15000 [06:00<27:20,  7.28it/s]


 epoch: 3050 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.32, test_acc: 96.4%

 epoch: 3051 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.4%

 epoch: 3052 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%


 20%|██        | 3054/15000 [06:00<23:57,  8.31it/s]


 epoch: 3053 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.28, test_acc: 96.8%

 epoch: 3054 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.29, test_acc: 97.0%

 epoch: 3055 | train_loss: 0.28, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%


 20%|██        | 3058/15000 [06:00<20:05,  9.91it/s]


 epoch: 3056 | train_loss: 0.26, train_acc: 96.4% | test_loss: 0.27, test_acc: 97.2%

 epoch: 3057 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.7%

 epoch: 3058 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.9%


 20%|██        | 3060/15000 [06:00<19:55,  9.99it/s]


 epoch: 3059 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.6%

input:       for rome conflict with christianity which romans variously regarded as form of atheism and superstitio the romans are known

target:      for rome conflict with christianity which romans variously regarded as form of atheism and superstitio the romans are known for

prediction:  for rome conflict with christianity which romans variously regarded as form of atheism and sugars the romans are known the

 epoch: 3060 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3061 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.32, test_acc: 96.4%


 20%|██        | 3062/15000 [06:01<18:51, 10.55it/s]


 epoch: 3062 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3063 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.5%


 20%|██        | 3066/15000 [06:01<24:39,  8.07it/s]


 epoch: 3064 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.7%

 epoch: 3065 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 3066 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.29, test_acc: 96.7%


 20%|██        | 3068/15000 [06:01<21:54,  9.08it/s]


 epoch: 3067 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3068 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3069 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.8%


 20%|██        | 3070/15000 [06:02<21:30,  9.25it/s]


input:       enclosed hypostyle hall to the front of the temple sanctuary style that was standard until the greco roman period

target:      enclosed hypostyle hall to the front of the temple sanctuary style that was standard until the greco roman period the

prediction:  enclosed hypostyle hall to the front of the temple sanctuary style that was standard until the greco roman period the

 epoch: 3070 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3071 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.7%


 20%|██        | 3074/15000 [06:02<19:00, 10.46it/s]


 epoch: 3072 | train_loss: 0.30, train_acc: 96.8% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3073 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3074 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%


 21%|██        | 3076/15000 [06:02<18:32, 10.71it/s]


 epoch: 3075 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.8%

 epoch: 3076 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.29, test_acc: 96.8%

 epoch: 3077 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.9%


 21%|██        | 3078/15000 [06:02<18:03, 11.00it/s]


 epoch: 3078 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%


 21%|██        | 3080/15000 [06:03<31:27,  6.31it/s]


 epoch: 3079 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.28, test_acc: 96.9%

input:       can unleash creativity thus raising morale so in the interest of encouraging employee consent to the rigours of the

target:      can unleash creativity thus raising morale so in the interest of encouraging employee consent to the rigours of the labour

prediction:  can unleash creativity thus raising morale so in the interest of encouraging employee consent to the rigours of the the

 epoch: 3080 | train_loss: 0.30, train_acc: 96.4% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3081 | train_loss: 0.28, train_acc: 96.5% | test_loss: 0.25, test_acc: 97.0%


 21%|██        | 3084/15000 [06:03<23:28,  8.46it/s]


 epoch: 3082 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3083 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3084 | train_loss: 0.26, train_acc: 96.5% | test_loss: 0.25, test_acc: 96.9%


 21%|██        | 3086/15000 [06:03<21:20,  9.31it/s]


 epoch: 3085 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3086 | train_loss: 0.30, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.5%

 epoch: 3087 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%


 21%|██        | 3090/15000 [06:04<19:45, 10.05it/s]


 epoch: 3088 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3089 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.6%

input:       development developmental psychologists who study children use number of research methods for example they make observations of children in

target:      development developmental psychologists who study children use number of research methods for example they make observations of children in natural

prediction:  development developmental psychologists who study children use number of research methods for example they make observations of children in the

 epoch: 3090 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%


 21%|██        | 3092/15000 [06:04<18:59, 10.45it/s]


 epoch: 3091 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3092 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.6%


 21%|██        | 3094/15000 [06:05<32:39,  6.08it/s]


 epoch: 3093 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3094 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3095 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.4%


 21%|██        | 3098/15000 [06:05<25:36,  7.75it/s]


 epoch: 3096 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3097 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3098 | train_loss: 0.30, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.8%


 21%|██        | 3100/15000 [06:05<26:11,  7.57it/s]


 epoch: 3099 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.28, test_acc: 96.7%

input:       uses teacher centered education and takes place in well regulated school environment regulations cover many aspects of education such

target:      uses teacher centered education and takes place in well regulated school environment regulations cover many aspects of education such as

prediction:  uses teacher centered education and takes place in well regulated school environment regulations cover many aspects of education such the

 epoch: 3100 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%


 21%|██        | 3102/15000 [06:05<24:02,  8.25it/s]


 epoch: 3101 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.8%

 epoch: 3102 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.30, test_acc: 96.3%


 21%|██        | 3104/15000 [06:06<23:34,  8.41it/s]


 epoch: 3103 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.8%

 epoch: 3104 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.8%


 21%|██        | 3106/15000 [06:06<23:29,  8.44it/s]


 epoch: 3105 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3106 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.8%


 21%|██        | 3108/15000 [06:07<41:20,  4.79it/s]


 epoch: 3107 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3108 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.7%


 21%|██        | 3110/15000 [06:07<34:45,  5.70it/s]


 epoch: 3109 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.7%

input:       the hawaiian islands with expenditures of over billion due to the mild year round weather tourist travel is popular

target:      the hawaiian islands with expenditures of over billion due to the mild year round weather tourist travel is popular throughout

prediction:  the hawaiian islands with expenditures of over billion due to the mild year round weather tourist travel is popular the

 epoch: 3110 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%


 21%|██        | 3112/15000 [06:07<29:24,  6.74it/s]


 epoch: 3111 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.7%

 epoch: 3112 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.6%


 21%|██        | 3114/15000 [06:07<25:25,  7.79it/s]


 epoch: 3113 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3114 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3115 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.7%


 21%|██        | 3117/15000 [06:08<20:33,  9.63it/s]


 epoch: 3116 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3117 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.8%


 21%|██        | 3119/15000 [06:08<19:22, 10.22it/s]


 epoch: 3118 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3119 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.6%

input:       and the development of formal education the invention of writing had significant influence on the history of education as

target:      and the development of formal education the invention of writing had significant influence on the history of education as whole

prediction:  and the development of formal education the invention of writing had significant influence on the history of education as the

 epoch: 3120 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.30, test_acc: 96.6%


 21%|██        | 3121/15000 [06:08<19:46, 10.01it/s]


 epoch: 3121 | train_loss: 0.28, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%


 21%|██        | 3123/15000 [06:08<25:21,  7.81it/s]


 epoch: 3122 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3123 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.29, test_acc: 97.0%

 epoch: 3124 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%


 21%|██        | 3127/15000 [06:09<20:53,  9.47it/s]


 epoch: 3125 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3126 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3127 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 21%|██        | 3129/15000 [06:09<19:44, 10.03it/s]


 epoch: 3128 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3129 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.25, test_acc: 96.8%

input:       education and new members had to pass through different stages on their way to masterhood the invention and popularization

target:      education and new members had to pass through different stages on their way to masterhood the invention and popularization of

prediction:  education and new members had to pass through different stages on their way to masterhood the invention and popularization the


 21%|██        | 3131/15000 [06:09<20:15,  9.76it/s]


 epoch: 3130 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3131 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.28, test_acc: 95.9%

 epoch: 3132 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.6%


 21%|██        | 3135/15000 [06:10<19:13, 10.29it/s]


 epoch: 3133 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3134 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3135 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.27, test_acc: 97.1%


 21%|██        | 3137/15000 [06:10<31:59,  6.18it/s]


 epoch: 3136 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.6%

 epoch: 3137 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.4%

 epoch: 3138 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.31, test_acc: 96.7%


 21%|██        | 3140/15000 [06:10<26:48,  7.37it/s]


 epoch: 3139 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.6%

input:       the universe cooled sufficiently to allow the formation of subatomic particles and later atoms the unequal abundances of matter

target:      the universe cooled sufficiently to allow the formation of subatomic particles and later atoms the unequal abundances of matter and

prediction:  the universe cooled sufficiently to allow the formation of subatomic particles and later atoms the unequal abundances of matter the

 epoch: 3140 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%


 21%|██        | 3142/15000 [06:11<23:44,  8.33it/s]


 epoch: 3141 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3142 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.28, test_acc: 96.7%

 epoch: 3143 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%


 21%|██        | 3146/15000 [06:11<20:12,  9.78it/s]


 epoch: 3144 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3145 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3146 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%


 21%|██        | 3148/15000 [06:11<19:36, 10.07it/s]


 epoch: 3147 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3148 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.8%

 epoch: 3149 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.8%

input:       metropolitan region of paulo is the largest and busiest in the country the airport connects paulo to

target:      metropolitan region of paulo is the largest and busiest in the country the airport connects paulo to practically

prediction:  metropolitan region of paulo is the largest and busiest in the country the airport connects paulo to the


 21%|██        | 3152/15000 [06:12<28:45,  6.87it/s]


 epoch: 3150 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.5%

 epoch: 3151 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3152 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%


 21%|██        | 3154/15000 [06:12<25:10,  7.84it/s]


 epoch: 3153 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 3154 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3155 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 21%|██        | 3158/15000 [06:12<20:48,  9.49it/s]


 epoch: 3156 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.2%

 epoch: 3157 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3158 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.6%


 21%|██        | 3160/15000 [06:13<20:34,  9.59it/s]


 epoch: 3159 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.0%

input:       expedition in and was the first to claim he had discovered the continent the british naval officer james clark

target:      expedition in and was the first to claim he had discovered the continent the british naval officer james clark ross

prediction:  expedition in and was the first to claim he had discovered the continent the british naval officer james clark the

 epoch: 3160 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%


 21%|██        | 3162/15000 [06:13<19:38, 10.04it/s]


 epoch: 3161 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3162 | train_loss: 0.26, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3163 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 21%|██        | 3166/15000 [06:13<23:11,  8.50it/s]


 epoch: 3164 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3165 | train_loss: 0.32, train_acc: 96.3% | test_loss: 0.29, test_acc: 96.4%

 epoch: 3166 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.30, test_acc: 96.4%


 21%|██        | 3168/15000 [06:14<20:54,  9.43it/s]


 epoch: 3167 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 3168 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3169 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 21%|██        | 3170/15000 [06:14<20:36,  9.57it/s]


input:       dominican republic haiti puerto rico territory of the jamaica and trinidad and tobago each have populations higher than

target:      dominican republic haiti puerto rico territory of the jamaica and trinidad and tobago each have populations higher than million

prediction:  dominican republic haiti puerto rico territory of the jamaica and trinidad and tobago each have populations higher than the

 epoch: 3170 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.29, test_acc: 96.6%

 epoch: 3171 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%


 21%|██        | 3174/15000 [06:14<18:27, 10.68it/s]


 epoch: 3172 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.6%

 epoch: 3173 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3174 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.9%


 21%|██        | 3176/15000 [06:14<18:07, 10.87it/s]


 epoch: 3175 | train_loss: 0.28, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.7%

 epoch: 3176 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3177 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 21%|██        | 3178/15000 [06:15<17:41, 11.13it/s]


 epoch: 3178 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 21%|██        | 3180/15000 [06:15<31:07,  6.33it/s]


 epoch: 3179 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.29, test_acc: 96.7%

input:       of joke theft some ending in lawsuits for copyright infringement those accused will sometimes claim cryptomnesia or parallel thinking

target:      of joke theft some ending in lawsuits for copyright infringement those accused will sometimes claim cryptomnesia or parallel thinking but

prediction:  of joke theft some ending in lawsuits for copyright infringement those accused will sometimes claim chera or parallel thinking the

 epoch: 3180 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%


 21%|██        | 3182/15000 [06:15<27:11,  7.24it/s]


 epoch: 3181 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3182 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3183 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.9%


 21%|██        | 3186/15000 [06:16<22:13,  8.86it/s]


 epoch: 3184 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.6%

 epoch: 3185 | train_loss: 0.29, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3186 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 21%|██▏       | 3188/15000 [06:16<20:27,  9.63it/s]


 epoch: 3187 | train_loss: 0.30, train_acc: 96.5% | test_loss: 0.28, test_acc: 96.6%

 epoch: 3188 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.7%

 epoch: 3189 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.2%


 21%|██▏       | 3190/15000 [06:16<20:18,  9.69it/s]


input:       remained remarkably stable and with few brief interruptions remained in place for years until bc aftermath of lamian war

target:      remained remarkably stable and with few brief interruptions remained in place for years until bc aftermath of lamian war the

prediction:  remained remarkably stable and with few brief interruptions remained in place for years until bc aftermath of lamian war the

 epoch: 3190 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.4%

 epoch: 3191 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.31, test_acc: 96.2%


 21%|██▏       | 3192/15000 [06:16<19:23, 10.15it/s]


 epoch: 3192 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.9%


 21%|██▏       | 3194/15000 [06:17<25:39,  7.67it/s]


 epoch: 3193 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.7%

 epoch: 3194 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.6%

 epoch: 3195 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 21%|██▏       | 3198/15000 [06:17<21:01,  9.35it/s]


 epoch: 3196 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.7%

 epoch: 3197 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3198 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.7%


 21%|██▏       | 3200/15000 [06:17<20:31,  9.58it/s]


 epoch: 3199 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%

input:       of the cathedral of pavia and was struck by the equestrian statue of regisole of which he left sketch

target:      of the cathedral of pavia and was struck by the equestrian statue of regisole of which he left sketch leonardo

prediction:  of the cathedral of pavia and was struck by the equestrian statue of regisole of which he left sketch the

 epoch: 3200 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.7%

 epoch: 3201 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%


 21%|██▏       | 3204/15000 [06:18<19:15, 10.21it/s]


 epoch: 3202 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3203 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3204 | train_loss: 0.28, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.7%


 21%|██▏       | 3206/15000 [06:18<19:26, 10.11it/s]


 epoch: 3205 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3206 | train_loss: 0.26, train_acc: 96.6% | test_loss: 0.27, test_acc: 96.6%


 21%|██▏       | 3209/15000 [06:19<31:45,  6.19it/s]


 epoch: 3207 | train_loss: 0.30, train_acc: 95.8% | test_loss: 0.27, test_acc: 96.4%

 epoch: 3208 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.4%

 epoch: 3209 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%


 21%|██▏       | 3210/15000 [06:19<31:53,  6.16it/s]


input:       is member state of the eu malta was considered an island of north western africa for centuries but now

target:      is member state of the eu malta was considered an island of north western africa for centuries but now it

prediction:  is member state of the eu malta was considered an island of north western africa for centuries but now of

 epoch: 3210 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.6%

 epoch: 3211 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 21%|██▏       | 3214/15000 [06:19<24:28,  8.03it/s]


 epoch: 3212 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.8%

 epoch: 3213 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3214 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%


 21%|██▏       | 3217/15000 [06:19<22:07,  8.87it/s]


 epoch: 3215 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.4%

 epoch: 3216 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3217 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.5%


 21%|██▏       | 3219/15000 [06:20<22:36,  8.69it/s]


 epoch: 3218 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3219 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.6%

input:       often seeming dry or dull without it shows may be filmed in front of live audience for the same


 21%|██▏       | 3221/15000 [06:20<24:44,  7.94it/s]


target:      often seeming dry or dull without it shows may be filmed in front of live audience for the same reason

prediction:  often seeming dry or dull without it shows may be filmed in front of live audience for the same the

 epoch: 3220 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.4%

 epoch: 3221 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.28, test_acc: 97.0%


 21%|██▏       | 3223/15000 [06:21<40:05,  4.90it/s]


 epoch: 3222 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3223 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.23, test_acc: 96.8%


 22%|██▏       | 3226/15000 [06:21<27:26,  7.15it/s]


 epoch: 3224 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3225 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3226 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%


 22%|██▏       | 3228/15000 [06:21<23:23,  8.39it/s]


 epoch: 3227 | train_loss: 0.29, train_acc: 96.5% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3228 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3229 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%


 22%|██▏       | 3230/15000 [06:21<22:14,  8.82it/s]


input:       your houses consists of the morphemes ev ler iniz den with the meanings house plural your from the languages

target:      your houses consists of the morphemes ev ler iniz den with the meanings house plural your from the languages that

prediction:  your houses consists of the morphemes ev barbara waste den with the meanings house plural your from the languages the

 epoch: 3230 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3231 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.9%


 22%|██▏       | 3234/15000 [06:22<19:47,  9.91it/s]


 epoch: 3232 | train_loss: 0.27, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3233 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.28, test_acc: 96.6%

 epoch: 3234 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.6%

 epoch: 3235 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.27, test_acc: 97.1%


 22%|██▏       | 3238/15000 [06:22<28:12,  6.95it/s]


 epoch: 3236 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3237 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3238 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 22%|██▏       | 3240/15000 [06:23<25:32,  7.67it/s]


 epoch: 3239 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%

input:       meat exports is important in argentina paraguay uruguay and colombia in tropical regions the most important crops are coffee

target:      meat exports is important in argentina paraguay uruguay and colombia in tropical regions the most important crops are coffee cocoa

prediction:  meat exports is important in argentina paraguay uruguay and colombia in tropical regions the most important crops are coffee the

 epoch: 3240 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3241 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%


 22%|██▏       | 3244/15000 [06:23<21:10,  9.25it/s]


 epoch: 3242 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3243 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3244 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%


 22%|██▏       | 3246/15000 [06:23<19:48,  9.89it/s]


 epoch: 3245 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3246 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3247 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.27, test_acc: 96.9%


 22%|██▏       | 3248/15000 [06:23<18:43, 10.46it/s]


 epoch: 3248 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3249 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.7%

input:       as well as how to conceptualize teachers students and their relation it includes educational ethics which examines various moral

target:      as well as how to conceptualize teachers students and their relation it includes educational ethics which examines various moral issues

prediction:  as well as how to conceptualize teachers students and their relation it includes educational ethics which examines various moral the


 22%|██▏       | 3252/15000 [06:24<20:50,  9.40it/s]


 epoch: 3250 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3251 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3252 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.6%


 22%|██▏       | 3254/15000 [06:24<19:29, 10.04it/s]


 epoch: 3253 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3254 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3255 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.3%


 22%|██▏       | 3258/15000 [06:24<17:46, 11.01it/s]


 epoch: 3256 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.6%

 epoch: 3257 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3258 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.0%


 22%|██▏       | 3260/15000 [06:25<18:20, 10.66it/s]


 epoch: 3259 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.8%

input:       software there are broadly two approaches to programming language implementation compilation and interpretation it is generally possible to implement

target:      software there are broadly two approaches to programming language implementation compilation and interpretation it is generally possible to implement language

prediction:  software there are broadly two approaches to programming language implementation compilation and interpretation it is generally possible to implement the

 epoch: 3260 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3261 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%


 22%|██▏       | 3262/15000 [06:25<17:59, 10.88it/s]


 epoch: 3262 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3263 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%


 22%|██▏       | 3266/15000 [06:26<27:09,  7.20it/s]


 epoch: 3264 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3265 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3266 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.7%


 22%|██▏       | 3268/15000 [06:26<23:53,  8.18it/s]


 epoch: 3267 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3268 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.29, test_acc: 96.9%

 epoch: 3269 | train_loss: 0.27, train_acc: 97.3% | test_loss: 0.27, test_acc: 97.0%


 22%|██▏       | 3270/15000 [06:26<22:48,  8.57it/s]


input:       the use of computer networks as for communication entertainment work and business however both oed and the american heritage

target:      the use of computer networks as for communication entertainment work and business however both oed and the american heritage dictionary

prediction:  the use of computer networks as for communication entertainment work and business however both oed and the american heritage the

 epoch: 3270 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.3%

 epoch: 3271 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%


 22%|██▏       | 3274/15000 [06:26<19:19, 10.11it/s]


 epoch: 3272 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.30, test_acc: 96.7%

 epoch: 3273 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3274 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%


 22%|██▏       | 3276/15000 [06:26<18:56, 10.32it/s]


 epoch: 3275 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.7%

 epoch: 3276 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3277 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%


 22%|██▏       | 3278/15000 [06:27<18:28, 10.57it/s]


 epoch: 3278 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.7%


 22%|██▏       | 3280/15000 [06:27<32:05,  6.09it/s]


 epoch: 3279 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

input:       the arts surreal humour is the effect of illogic and absurdity being used for humorous effect under such premises

target:      the arts surreal humour is the effect of illogic and absurdity being used for humorous effect under such premises people

prediction:  the arts surreal humour is the effect of illogic and absurdity being used for humorous effect under such premises the

 epoch: 3280 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3281 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 22%|██▏       | 3284/15000 [06:28<24:02,  8.12it/s]


 epoch: 3282 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3283 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3284 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.6%


 22%|██▏       | 3286/15000 [06:28<21:41,  9.00it/s]


 epoch: 3285 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3286 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3287 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.29, test_acc: 96.8%


 22%|██▏       | 3290/15000 [06:28<20:08,  9.69it/s]


 epoch: 3288 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.6%

 epoch: 3289 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       study motivation or the subject of why people or lower animals initiate behavior at particular time it also involves

target:      study motivation or the subject of why people or lower animals initiate behavior at particular time it also involves the

prediction:  study motivation or the subject of why people or lower animals initiate behavior at particular time it also involves the

 epoch: 3290 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%


 22%|██▏       | 3292/15000 [06:28<19:02, 10.25it/s]


 epoch: 3291 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.6%

 epoch: 3292 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3293 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 22%|██▏       | 3296/15000 [06:29<17:51, 10.93it/s]


 epoch: 3294 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.9%

 epoch: 3295 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3296 | train_loss: 0.28, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 22%|██▏       | 3298/15000 [06:29<17:42, 11.02it/s]


 epoch: 3297 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3298 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3299 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 22%|██▏       | 3300/15000 [06:29<18:45, 10.40it/s]


input:       norwegian girl solveig gunbj rg jacobsen born in grytviken on october emilio marcos palma was the first person born

target:      norwegian girl solveig gunbj rg jacobsen born in grytviken on october emilio marcos palma was the first person born south

prediction:  norwegian girl solveig gunbj rg jacobsen born in forced on october disciple bantu palma was the first person born the

 epoch: 3300 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.7%

 epoch: 3301 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%


 22%|██▏       | 3304/15000 [06:29<17:43, 11.00it/s]


 epoch: 3302 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.2%

 epoch: 3303 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3304 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%


 22%|██▏       | 3306/15000 [06:30<17:55, 10.87it/s]


 epoch: 3305 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3306 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%


 22%|██▏       | 3308/15000 [06:30<31:54,  6.11it/s]


 epoch: 3307 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3308 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3309 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.6%


 22%|██▏       | 3310/15000 [06:30<28:18,  6.88it/s]


input:       orangered and periwinkle based on both the colors of the team fortress teams as well as the colors of

target:      orangered and periwinkle based on both the colors of the team fortress teams as well as the colors of the

prediction:  orangered and periwinkle based on both the colors of the team fortress teams as well as the colors of the

 epoch: 3310 | train_loss: 0.27, train_acc: 96.5% | test_loss: 0.27, test_acc: 97.2%

 epoch: 3311 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%


 22%|██▏       | 3314/15000 [06:31<22:30,  8.65it/s]


 epoch: 3312 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.6%

 epoch: 3313 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3314 | train_loss: 0.29, train_acc: 96.7% | test_loss: 0.28, test_acc: 96.9%


 22%|██▏       | 3316/15000 [06:31<21:02,  9.25it/s]


 epoch: 3315 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3316 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.7%

 epoch: 3317 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.7%


 22%|██▏       | 3318/15000 [06:31<20:38,  9.43it/s]


 epoch: 3318 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3319 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.7%

input:       features such as exception handling and type checking that make real sources of erratic behaviour easier to spot in

target:      features such as exception handling and type checking that make real sources of erratic behaviour easier to spot in programming


 22%|██▏       | 3321/15000 [06:32<22:14,  8.75it/s]


prediction:  features such as exception handling and type checking that make real sources of erratic behaviour easier to spot in of

 epoch: 3320 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.7%

 epoch: 3321 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.1%


 22%|██▏       | 3323/15000 [06:32<32:47,  5.93it/s]


 epoch: 3322 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.7%

 epoch: 3323 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.24, test_acc: 97.3%


 22%|██▏       | 3325/15000 [06:32<27:53,  6.97it/s]


 epoch: 3324 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.5%

 epoch: 3325 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.8%


 22%|██▏       | 3327/15000 [06:33<25:08,  7.74it/s]


 epoch: 3326 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3327 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.7%


 22%|██▏       | 3329/15000 [06:33<22:15,  8.74it/s]


 epoch: 3328 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3329 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.28, test_acc: 97.0%

input:       is an important part of the way that we use language to point out entities in the world pragmatics

target:      is an important part of the way that we use language to point out entities in the world pragmatics is


 22%|██▏       | 3331/15000 [06:33<23:45,  8.19it/s]


prediction:  is an important part of the way that we use language to point out entities in the world pragmatics the

 epoch: 3330 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3331 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.8%


 22%|██▏       | 3333/15000 [06:33<23:08,  8.40it/s]


 epoch: 3332 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3333 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 22%|██▏       | 3335/15000 [06:33<21:21,  9.10it/s]


 epoch: 3334 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.7%

 epoch: 3335 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 22%|██▏       | 3337/15000 [06:34<24:34,  7.91it/s]


 epoch: 3336 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3337 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 22%|██▏       | 3339/15000 [06:34<23:45,  8.18it/s]


 epoch: 3338 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.8%

 epoch: 3339 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.8%

input:       body was then wrapped in linen with protective amulets inserted between layers and placed in decorated anthropoid coffin mummies

target:      body was then wrapped in linen with protective amulets inserted between layers and placed in decorated anthropoid coffin mummies of


 22%|██▏       | 3342/15000 [06:34<20:19,  9.56it/s]


prediction:  body was then wrapped in linen with protective amulets inserted between layers and placed in decorated anthropoid coffin mummies the

 epoch: 3340 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3341 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 3342 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.30, test_acc: 96.6%


 22%|██▏       | 3344/15000 [06:34<19:05, 10.18it/s]


 epoch: 3343 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3344 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3345 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 22%|██▏       | 3348/15000 [06:35<18:02, 10.76it/s]


 epoch: 3346 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3347 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3348 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3349 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

input:       the largest producers in the world the world bank annually lists the top manufacturing countries by total manufacturing value

target:      the largest producers in the world the world bank annually lists the top manufacturing countries by total manufacturing value according

prediction:  the largest producers in the world the world bank annually lists the top manufacturing countries by total manufacturing value the


 22%|██▏       | 3352/15000 [06:36<28:29,  6.82it/s]


 epoch: 3350 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3351 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3352 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 22%|██▏       | 3354/15000 [06:36<24:44,  7.85it/s]


 epoch: 3353 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3354 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.28, test_acc: 96.8%

 epoch: 3355 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.8%


 22%|██▏       | 3358/15000 [06:36<20:16,  9.57it/s]


 epoch: 3356 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3357 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.28, test_acc: 96.7%

 epoch: 3358 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 22%|██▏       | 3360/15000 [06:36<19:47,  9.80it/s]


 epoch: 3359 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.2%

input:       the hardware usually run much faster than those that are interpreted in software better source needed one technique for

target:      the hardware usually run much faster than those that are interpreted in software better source needed one technique for improving

prediction:  the hardware usually run much faster than those that are interpreted in software better source needed one technique for the

 epoch: 3360 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3361 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.29, test_acc: 96.8%


 22%|██▏       | 3364/15000 [06:37<17:52, 10.85it/s]


 epoch: 3362 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3363 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3364 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.7%


 22%|██▏       | 3366/15000 [06:37<27:25,  7.07it/s]


 epoch: 3365 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3366 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3367 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.6%


 22%|██▏       | 3370/15000 [06:38<22:27,  8.63it/s]


 epoch: 3368 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3369 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

input:       part of the roman empire he had supported the imperial claims of galba after whose death vespasian became major

target:      part of the roman empire he had supported the imperial claims of galba after whose death vespasian became major contender

prediction:  part of the roman empire he had supported the imperial claims of galba after whose death vespasian became major the

 epoch: 3370 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%


 22%|██▏       | 3372/15000 [06:38<20:20,  9.52it/s]


 epoch: 3371 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3372 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3373 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%


 23%|██▎       | 3376/15000 [06:38<18:38, 10.39it/s]


 epoch: 3374 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3375 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3376 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.7%


 23%|██▎       | 3378/15000 [06:38<17:49, 10.87it/s]


 epoch: 3377 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3378 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3379 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.7%


 23%|██▎       | 3380/15000 [06:38<18:45, 10.32it/s]


input:       original inhabitants of the australian continent and nearby islands who migrated from africa to asia years ago and arrived

target:      original inhabitants of the australian continent and nearby islands who migrated from africa to asia years ago and arrived in

prediction:  original inhabitants of the australian continent and nearby islands who migrated from africa to asia years ago and arrived of

 epoch: 3380 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%

 epoch: 3381 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.0%


 23%|██▎       | 3384/15000 [06:39<16:59, 11.39it/s]


 epoch: 3382 | train_loss: 0.29, train_acc: 96.6% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3383 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.6%

 epoch: 3384 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.9%


 23%|██▎       | 3386/15000 [06:39<16:45, 11.55it/s]


 epoch: 3385 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3386 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.7%

 epoch: 3387 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 23%|██▎       | 3388/15000 [06:39<16:47, 11.52it/s]


 epoch: 3388 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3389 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

input:       southeast africa the bantu speaking peoples from the sahel progressively expanded over most of sub saharan africa but there

target:      southeast africa the bantu speaking peoples from the sahel progressively expanded over most of sub saharan africa but there are

prediction:  southeast africa the bantu speaking peoples from the sahel progressively expanded over most of sub saharan africa but there the


 23%|██▎       | 3392/15000 [06:40<17:29, 11.06it/s]


 epoch: 3390 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3391 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3392 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3393 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.28, test_acc: 96.4%


 23%|██▎       | 3396/15000 [06:40<26:00,  7.44it/s]


 epoch: 3394 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3395 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3396 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%


 23%|██▎       | 3398/15000 [06:40<23:02,  8.39it/s]


 epoch: 3397 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.5%

 epoch: 3398 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3399 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%


 23%|██▎       | 3400/15000 [06:41<21:55,  8.82it/s]


input:       characters as well as having other difficulty specific effects for example the peaceful difficulty prevents hostile mobs from spawning

target:      characters as well as having other difficulty specific effects for example the peaceful difficulty prevents hostile mobs from spawning and

prediction:  characters as well as having other difficulty specific effects for example the peaceful difficulty prevents hostile mobs from spawning the

 epoch: 3400 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.3%

 epoch: 3401 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.8%


 23%|██▎       | 3404/15000 [06:41<19:27,  9.93it/s]


 epoch: 3402 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3403 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3404 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.28, test_acc: 96.8%


 23%|██▎       | 3406/15000 [06:41<18:46, 10.29it/s]


 epoch: 3405 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3406 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.9%

 epoch: 3407 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.0%


 23%|██▎       | 3408/15000 [06:42<25:21,  7.62it/s]


 epoch: 3408 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3409 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.9%

input:       as the aleutians and the kuriles usually are excluded in addition the series of sovereign island nations fringing asia

target:      as the aleutians and the kuriles usually are excluded in addition the series of sovereign island nations fringing asia japan

prediction:  as the aleutians and the kuriles usually are excluded in addition the series of sovereign island nations slightly asia the

 epoch: 3410 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 23%|██▎       | 3412/15000 [06:42<21:26,  9.01it/s]


 epoch: 3411 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3412 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3413 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 23%|██▎       | 3416/15000 [06:42<18:58, 10.17it/s]


 epoch: 3414 | train_loss: 0.30, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3415 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3416 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%


 23%|██▎       | 3418/15000 [06:43<18:09, 10.63it/s]


 epoch: 3417 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3418 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3419 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.27, test_acc: 97.2%


 23%|██▎       | 3420/15000 [06:43<18:47, 10.27it/s]


input:       to reconstruct the history of rock deformation in the area in addition they perform analog and numerical experiments of

target:      to reconstruct the history of rock deformation in the area in addition they perform analog and numerical experiments of rock

prediction:  to reconstruct the history of rock deformation in the area in addition they perform analog and numerical experiments of the

 epoch: 3420 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.25, test_acc: 96.6%

 epoch: 3421 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.6%


 23%|██▎       | 3424/15000 [06:44<26:39,  7.24it/s]


 epoch: 3422 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.29, test_acc: 96.7%

 epoch: 3423 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3424 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 23%|██▎       | 3426/15000 [06:44<23:25,  8.24it/s]


 epoch: 3425 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3426 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.7%

 epoch: 3427 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%


 23%|██▎       | 3428/15000 [06:44<21:26,  9.00it/s]


 epoch: 3428 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

 epoch: 3429 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

input:       of the populations of both india and nepal adhere to hinduism alongside significant communities in bangladesh pakistan bhutan sri

target:      of the populations of both india and nepal adhere to hinduism alongside significant communities in bangladesh pakistan bhutan sri lanka

prediction:  of the populations of both india and nepal adhere to hinduism alongside significant communities in bangladesh pakistan bhutan sri the


 23%|██▎       | 3430/15000 [06:44<21:39,  8.90it/s]


 epoch: 3430 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3431 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 23%|██▎       | 3434/15000 [06:45<21:13,  9.08it/s]


 epoch: 3432 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 3433 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3434 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 23%|██▎       | 3436/15000 [06:45<20:24,  9.45it/s]


 epoch: 3435 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.7%

 epoch: 3436 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.30, test_acc: 97.0%


 23%|██▎       | 3438/15000 [06:45<34:19,  5.61it/s]


 epoch: 3437 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 3438 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3439 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.29, test_acc: 96.9%


 23%|██▎       | 3441/15000 [06:46<27:51,  6.91it/s]


input:       in australia early history when it was still part of gondwana australia is situated in the middle of the

target:      in australia early history when it was still part of gondwana australia is situated in the middle of the tectonic

prediction:  in australia early history when it was still part of gondwana australia is situated in the middle of the the

 epoch: 3440 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3441 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 23%|██▎       | 3443/15000 [06:46<25:11,  7.65it/s]


 epoch: 3442 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.8%

 epoch: 3443 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 23%|██▎       | 3445/15000 [06:46<24:19,  7.92it/s]


 epoch: 3444 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3445 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%


 23%|██▎       | 3448/15000 [06:47<21:44,  8.86it/s]


 epoch: 3446 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3447 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3448 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 23%|██▎       | 3449/15000 [06:47<21:51,  8.81it/s]


 epoch: 3449 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

input:       and were resistant to floods and other environmental hazards some remained usable for over thousand years roman bridges were

target:      and were resistant to floods and other environmental hazards some remained usable for over thousand years roman bridges were among

prediction:  and were resistant to floods and other environmental hazards some remained usable for over thousand years roman bridges were the


 23%|██▎       | 3452/15000 [06:47<25:11,  7.64it/s]


 epoch: 3450 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3451 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.3%

 epoch: 3452 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.2%


 23%|██▎       | 3454/15000 [06:47<22:14,  8.65it/s]


 epoch: 3453 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3454 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.1%

 epoch: 3455 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 23%|██▎       | 3458/15000 [06:48<18:38, 10.32it/s]


 epoch: 3456 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3457 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3458 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 23%|██▎       | 3460/15000 [06:48<19:02, 10.10it/s]


 epoch: 3459 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.0%

input:       of words that prototypically denotes things and concepts and group of words that prototypically denotes actions and events the

target:      of words that prototypically denotes things and concepts and group of words that prototypically denotes actions and events the first

prediction:  of words that prototypically denotes things and concepts and group of words that prototypically denotes actions and events the the

 epoch: 3460 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.8%


 23%|██▎       | 3462/15000 [06:48<18:15, 10.53it/s]


 epoch: 3461 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3462 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3463 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%


 23%|██▎       | 3466/15000 [06:49<20:28,  9.39it/s]


 epoch: 3464 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.28, test_acc: 96.6%

 epoch: 3465 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3466 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.28, test_acc: 97.1%


 23%|██▎       | 3468/15000 [06:49<18:49, 10.21it/s]


 epoch: 3467 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3468 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 3469 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.6%


 23%|██▎       | 3470/15000 [06:49<19:11, 10.01it/s]


input:       early th centuries such as vaudeville and burlesque charles farrar browne april march was an american humor writer better

target:      early th centuries such as vaudeville and burlesque charles farrar browne april march was an american humor writer better known

prediction:  early th centuries such as vaudeville and burlesque charles farrar browne april march was an american humor writer better the

 epoch: 3470 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3471 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 23%|██▎       | 3474/15000 [06:49<17:40, 10.87it/s]


 epoch: 3472 | train_loss: 0.29, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3473 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3474 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.6%


 23%|██▎       | 3476/15000 [06:49<17:31, 10.96it/s]


 epoch: 3475 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3476 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.2%

 epoch: 3477 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 23%|██▎       | 3478/15000 [06:50<17:30, 10.97it/s]


 epoch: 3478 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.6%


 23%|██▎       | 3480/15000 [06:50<32:09,  5.97it/s]


 epoch: 3479 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.9%

input:       and pocket edition were either merged into bedrock or discontinued and as such have not received further updates on

target:      and pocket edition were either merged into bedrock or discontinued and as such have not received further updates on april

prediction:  and pocket edition were either merged into bedrock or discontinued and as such have not received further updates on the

 epoch: 3480 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3481 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.29, test_acc: 96.8%


 23%|██▎       | 3484/15000 [06:51<23:42,  8.10it/s]


 epoch: 3482 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.29, test_acc: 96.8%

 epoch: 3483 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3484 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 23%|██▎       | 3486/15000 [06:51<21:19,  9.00it/s]


 epoch: 3485 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3486 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3487 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 23%|██▎       | 3490/15000 [06:51<19:45,  9.71it/s]


 epoch: 3488 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 3489 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

input:       japanese archipelago the kuril islands and continental islands off the coast of the americas such as the channel islands

target:      japanese archipelago the kuril islands and continental islands off the coast of the americas such as the channel islands the

prediction:  japanese archipelago the kuril islands and continental islands off the coast of the americas such as the channel islands the

 epoch: 3490 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.27, test_acc: 97.2%


 23%|██▎       | 3492/15000 [06:51<18:41, 10.26it/s]


 epoch: 3491 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3492 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.0%


 23%|██▎       | 3494/15000 [06:52<25:53,  7.41it/s]


 epoch: 3493 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3494 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3495 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.28, test_acc: 97.1%


 23%|██▎       | 3498/15000 [06:52<20:39,  9.28it/s]


 epoch: 3496 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3497 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 3498 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 23%|██▎       | 3500/15000 [06:52<20:01,  9.57it/s]


 epoch: 3499 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

input:       set said of the sun or phoenician ereb evening west which is at the origin of arabic maghreb and

target:      set said of the sun or phoenician ereb evening west which is at the origin of arabic maghreb and hebrew

prediction:  set said of the sun or phoenician ereb evening west which is at the origin of arabic maghreb and the

 epoch: 3500 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3501 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%


 23%|██▎       | 3504/15000 [06:53<18:00, 10.64it/s]


 epoch: 3502 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3503 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.7%

 epoch: 3504 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.6%


 23%|██▎       | 3506/15000 [06:53<17:29, 10.95it/s]


 epoch: 3505 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.3%

 epoch: 3506 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3507 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.9%


 23%|██▎       | 3510/15000 [06:54<26:20,  7.27it/s]


 epoch: 3508 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 3509 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

input:       samoa the solomon islands tokelau tonga tuvalu vanuatu wallis and futuna and the united states minor outlying islands baker

target:      samoa the solomon islands tokelau tonga tuvalu vanuatu wallis and futuna and the united states minor outlying islands baker island

prediction:  samoa the solomon islands tokelau tonga tuvalu vanuatu wallis and futuna and the united states minor outlying islands baker the

 epoch: 3510 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 23%|██▎       | 3512/15000 [06:54<23:00,  8.32it/s]


 epoch: 3511 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3512 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.8%

 epoch: 3513 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%


 23%|██▎       | 3516/15000 [06:54<18:53, 10.13it/s]


 epoch: 3514 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3515 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3516 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 23%|██▎       | 3518/15000 [06:54<18:03, 10.60it/s]


 epoch: 3517 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 3518 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3519 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 23%|██▎       | 3520/15000 [06:54<18:44, 10.21it/s]


input:       com can be accessed and managed using the standard git command line interface all standard git commands work with

target:      com can be accessed and managed using the standard git command line interface all standard git commands work with it

prediction:  com can be accessed and managed using the standard git command line interface all standard git commands work with the

 epoch: 3520 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.3%

 epoch: 3521 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 23%|██▎       | 3524/15000 [06:55<26:22,  7.25it/s]


 epoch: 3522 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3523 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3524 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 24%|██▎       | 3526/15000 [06:55<22:55,  8.34it/s]


 epoch: 3525 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3526 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3527 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 24%|██▎       | 3530/15000 [06:56<20:09,  9.49it/s]


 epoch: 3528 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3529 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.7%

input:       the bismarck archipelago of north west melanesia easter islanders claimed that chief hotu matu discovered the island in one

target:      the bismarck archipelago of north west melanesia easter islanders claimed that chief hotu matu discovered the island in one or

prediction:  the bismarck archipelago of north west melanesia easter islanders claimed that chief hotu matu discovered the island in one the

 epoch: 3530 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 24%|██▎       | 3532/15000 [06:56<18:59, 10.06it/s]


 epoch: 3531 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3532 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3533 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.29, test_acc: 96.8%


 24%|██▎       | 3534/15000 [06:56<18:27, 10.35it/s]


 epoch: 3534 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3535 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%


 24%|██▎       | 3538/15000 [06:57<26:57,  7.09it/s]


 epoch: 3536 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3537 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3538 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 24%|██▎       | 3540/15000 [06:57<26:12,  7.29it/s]


 epoch: 3539 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.6%

input:       governing danish island and the world largest is on the same tectonic plate the north american plate and is

target:      governing danish island and the world largest is on the same tectonic plate the north american plate and is part

prediction:  governing danish island and the world largest is on the same tectonic plate the north american plate and is the

 epoch: 3540 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%


 24%|██▎       | 3542/15000 [06:57<24:01,  7.95it/s]


 epoch: 3541 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3542 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3543 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 24%|██▎       | 3544/15000 [06:58<22:01,  8.67it/s]



 epoch: 3544 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%


 24%|██▎       | 3547/15000 [06:58<20:33,  9.28it/s]


 epoch: 3545 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3546 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 3547 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%


 24%|██▎       | 3549/15000 [06:58<21:15,  8.98it/s]


 epoch: 3548 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 3549 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

input:       charge and compulsory up to certain age the definition of education has been explored by theorists from various fields


 24%|██▎       | 3550/15000 [06:58<23:47,  8.02it/s]


target:      charge and compulsory up to certain age the definition of education has been explored by theorists from various fields many

prediction:  charge and compulsory up to certain age the definition of education has been explored by theorists from various fields the

 epoch: 3550 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 24%|██▎       | 3553/15000 [06:59<32:39,  5.84it/s]


 epoch: 3551 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.4%

 epoch: 3552 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3553 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 24%|██▎       | 3556/15000 [06:59<24:32,  7.77it/s]


 epoch: 3554 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3555 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.2%

 epoch: 3556 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.2%


 24%|██▎       | 3559/15000 [07:00<21:23,  8.92it/s]


 epoch: 3557 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3558 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3559 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.8%


 24%|██▎       | 3561/15000 [07:00<23:06,  8.25it/s]


input:       he lists various architectural memes that circulated since the and which in his view have led to contemporary architecture

target:      he lists various architectural memes that circulated since the and which in his view have led to contemporary architecture becoming

prediction:  he lists various architectural memes that circulated since the and which in his view have led to contemporary architecture the

 epoch: 3560 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3561 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 24%|██▍       | 3564/15000 [07:00<20:08,  9.47it/s]


 epoch: 3562 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 3563 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3564 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%


 24%|██▍       | 3567/15000 [07:01<29:51,  6.38it/s]


 epoch: 3565 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.28, test_acc: 96.9%

 epoch: 3566 | train_loss: 0.26, train_acc: 96.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 3567 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%


 24%|██▍       | 3570/15000 [07:01<24:19,  7.83it/s]


 epoch: 3568 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3569 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.28, test_acc: 97.1%

input:       ller published world map and placed the word america on the continent of present day south america the continent

target:      ller published world map and placed the word america on the continent of present day south america the continent north

prediction:  ller published world map and placed the word america on the continent of present day south america the continent the

 epoch: 3570 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.1%


 24%|██▍       | 3572/15000 [07:01<21:17,  8.94it/s]


 epoch: 3571 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3572 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3573 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 24%|██▍       | 3576/15000 [07:02<18:35, 10.25it/s]


 epoch: 3574 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 3575 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3576 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%


 24%|██▍       | 3578/15000 [07:02<18:15, 10.43it/s]


 epoch: 3577 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.28, test_acc: 96.8%

 epoch: 3578 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 24%|██▍       | 3580/15000 [07:02<31:58,  5.95it/s]


 epoch: 3579 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.7%

input:       allow fighting between players many servers have custom plugins that allow actions that are not normally possible in mojang

target:      allow fighting between players many servers have custom plugins that allow actions that are not normally possible in mojang announced

prediction:  allow fighting between players many servers have custom plugins that allow actions that are not normally possible in mojang the

 epoch: 3580 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3581 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%


 24%|██▍       | 3584/15000 [07:03<23:28,  8.10it/s]


 epoch: 3582 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3583 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3584 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 24%|██▍       | 3586/15000 [07:03<20:54,  9.10it/s]


 epoch: 3585 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3586 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3587 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 24%|██▍       | 3588/15000 [07:03<19:23,  9.81it/s]


 epoch: 3588 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 3589 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%

input:       experts are one of the primary ways that github funds student oriented events and communities campus experts are given

target:      experts are one of the primary ways that github funds student oriented events and communities campus experts are given access

prediction:  experts are one of the primary ways that github funds student oriented events and communities campus experts are given the


 24%|██▍       | 3592/15000 [07:04<18:43, 10.15it/s]


 epoch: 3590 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3591 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3592 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%


 24%|██▍       | 3594/15000 [07:04<27:51,  6.82it/s]


 epoch: 3593 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3594 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.3%

 epoch: 3595 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 24%|██▍       | 3598/15000 [07:04<21:39,  8.77it/s]


 epoch: 3596 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3597 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3598 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.3%


 24%|██▍       | 3600/15000 [07:05<20:39,  9.20it/s]


 epoch: 3599 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

input:       as mandarin tagalog hindi italian arabic portuguese polish german spanish russian korean cantonese and greek among many others namely

target:      as mandarin tagalog hindi italian arabic portuguese polish german spanish russian korean cantonese and greek among many others namely in

prediction:  as mandarin tagalog hindi italian arabic portuguese polish german spanish russian korean cantonese and greek among many others namely the

 epoch: 3600 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.29, test_acc: 96.6%

 epoch: 3601 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 24%|██▍       | 3604/15000 [07:05<18:20, 10.35it/s]


 epoch: 3602 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.2%

 epoch: 3603 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3604 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%


 24%|██▍       | 3606/15000 [07:05<17:49, 10.66it/s]


 epoch: 3605 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.9%

 epoch: 3606 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3607 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 24%|██▍       | 3610/15000 [07:06<27:14,  6.97it/s]


 epoch: 3608 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3609 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       primary the second unaccomplished secondary and the third accomplished secondary usually the first two degrees of education eight years

target:      primary the second unaccomplished secondary and the third accomplished secondary usually the first two degrees of education eight years are

prediction:  primary the second unaccomplished secondary and the third accomplished secondary usually the first two degrees of education eight years the

 epoch: 3610 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 24%|██▍       | 3612/15000 [07:06<23:37,  8.03it/s]


 epoch: 3611 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.6%

 epoch: 3612 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.2%

 epoch: 3613 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 24%|██▍       | 3616/15000 [07:06<19:22,  9.80it/s]


 epoch: 3614 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3615 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3616 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 24%|██▍       | 3618/15000 [07:07<18:20, 10.34it/s]


 epoch: 3617 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 3618 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3619 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%


 24%|██▍       | 3620/15000 [07:07<18:51, 10.05it/s]


input:       acclaimed series house of cards in netflix success encouraged the creation of numerous other streaming services such as hulu

target:      acclaimed series house of cards in netflix success encouraged the creation of numerous other streaming services such as hulu youtube

prediction:  acclaimed series house of cards in netflix success encouraged the creation of numerous other streaming services such as hulu the

 epoch: 3620 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3621 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 24%|██▍       | 3624/15000 [07:08<26:04,  7.27it/s]


 epoch: 3622 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3623 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.8%

 epoch: 3624 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 24%|██▍       | 3626/15000 [07:08<22:43,  8.34it/s]


 epoch: 3625 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3626 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3627 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 24%|██▍       | 3630/15000 [07:08<19:55,  9.51it/s]


 epoch: 3628 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3629 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       waves brought the ancestors of present day athabaskans aleuts and eskimos over time indigenous cultures in north america grew

target:      waves brought the ancestors of present day athabaskans aleuts and eskimos over time indigenous cultures in north america grew increasingly

prediction:  waves brought the ancestors of present day athabaskans aleuts and eskimos over time indigenous cultures in north america grew the

 epoch: 3630 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 24%|██▍       | 3632/15000 [07:08<18:48, 10.07it/s]


 epoch: 3631 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3632 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3633 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%


 24%|██▍       | 3634/15000 [07:08<18:13, 10.40it/s]


 epoch: 3634 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3635 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 24%|██▍       | 3638/15000 [07:09<24:34,  7.71it/s]


 epoch: 3636 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3637 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3638 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.26, test_acc: 96.8%


 24%|██▍       | 3640/15000 [07:09<22:43,  8.33it/s]


 epoch: 3639 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%

input:       others have numerous outlets around the world and pioneered the drive through format in the characteristic american dishes such

target:      others have numerous outlets around the world and pioneered the drive through format in the characteristic american dishes such as

prediction:  others have numerous outlets around the world and pioneered the drive through format in the characteristic american dishes such the

 epoch: 3640 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3641 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 24%|██▍       | 3644/15000 [07:10<18:45, 10.09it/s]


 epoch: 3642 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3643 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3644 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.7%


 24%|██▍       | 3646/15000 [07:10<18:02, 10.49it/s]


 epoch: 3645 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3646 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3647 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 24%|██▍       | 3648/15000 [07:10<17:41, 10.69it/s]


 epoch: 3648 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3649 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.8%

input:       to drift apart into two divergent populations with different sets of alleles according to the neutral theory of molecular

target:      to drift apart into two divergent populations with different sets of alleles according to the neutral theory of molecular evolution


 24%|██▍       | 3650/15000 [07:10<19:31,  9.69it/s]


prediction:  to drift apart into two divergent populations with different sets of alleles according to the neutral theory of molecular the

 epoch: 3650 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 24%|██▍       | 3652/15000 [07:11<31:37,  5.98it/s]


 epoch: 3651 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3652 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.8%


 24%|██▍       | 3654/15000 [07:11<28:02,  6.74it/s]


 epoch: 3653 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3654 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.28, test_acc: 97.0%


 24%|██▍       | 3658/15000 [07:12<22:01,  8.58it/s]


 epoch: 3655 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.7%

 epoch: 3656 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3657 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 3658 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.9%


 24%|██▍       | 3660/15000 [07:12<24:01,  7.87it/s]


 epoch: 3659 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       warfare in public controversy called attention to the army project camelot the manhattan project of social science an effort

target:      warfare in public controversy called attention to the army project camelot the manhattan project of social science an effort which

prediction:  warfare in public controversy called attention to the army project gaseous the manhattan project of social science an effort the

 epoch: 3660 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%


 24%|██▍       | 3662/15000 [07:12<23:09,  8.16it/s]


 epoch: 3661 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3662 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%


 24%|██▍       | 3664/15000 [07:12<21:38,  8.73it/s]


 epoch: 3663 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3664 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%


 24%|██▍       | 3667/15000 [07:13<31:59,  5.91it/s]


 epoch: 3665 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3666 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3667 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 24%|██▍       | 3670/15000 [07:13<25:04,  7.53it/s]


 epoch: 3668 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3669 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

input:       grain per month while foreman might earn sacks kg or lb prices were fixed across the country and recorded

target:      grain per month while foreman might earn sacks kg or lb prices were fixed across the country and recorded in

prediction:  grain per month while foreman might earn sacks kg or lb prices were fixed across the country and recorded the

 epoch: 3670 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.1%


 24%|██▍       | 3672/15000 [07:13<21:12,  8.90it/s]


 epoch: 3671 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3672 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 3673 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.6%


 25%|██▍       | 3676/15000 [07:14<18:00, 10.48it/s]


 epoch: 3674 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.7%

 epoch: 3675 | train_loss: 0.28, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.4%

 epoch: 3676 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%


 25%|██▍       | 3678/15000 [07:14<17:19, 10.89it/s]


 epoch: 3677 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 3678 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3679 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.26, test_acc: 97.2%


 25%|██▍       | 3680/15000 [07:14<18:26, 10.23it/s]


input:       missionaries from england the united states and other countries established missionary and boarding schools in india later as these

target:      missionaries from england the united states and other countries established missionary and boarding schools in india later as these schools

prediction:  missionaries from england the united states and other countries established missionary and boarding schools in india later as these the

 epoch: 3680 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3681 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 25%|██▍       | 3684/15000 [07:14<16:59, 11.10it/s]


 epoch: 3682 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3683 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3684 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%


 25%|██▍       | 3686/15000 [07:15<16:28, 11.44it/s]


 epoch: 3685 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3686 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3687 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%


 25%|██▍       | 3688/15000 [07:15<16:29, 11.44it/s]


 epoch: 3688 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3689 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

input:       blocked github to prevent email leakage of hacked account belonging to the country energy minister on march large scale

target:      blocked github to prevent email leakage of hacked account belonging to the country energy minister on march large scale ddos

prediction:  blocked github to prevent email leakage of hacked account belonging to the country energy minister on march large scale the


 25%|██▍       | 3692/15000 [07:15<17:17, 10.90it/s]


 epoch: 3690 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3691 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 3692 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 25%|██▍       | 3694/15000 [07:16<22:59,  8.20it/s]


 epoch: 3693 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.3%

 epoch: 3694 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3695 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.2%


 25%|██▍       | 3698/15000 [07:16<19:06,  9.86it/s]


 epoch: 3696 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3697 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3698 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.8%


 25%|██▍       | 3700/15000 [07:16<19:14,  9.79it/s]


 epoch: 3699 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.29, test_acc: 96.6%

input:       distinction between germ cells that give rise to gametes such as sperm and egg cells and the somatic cells

target:      distinction between germ cells that give rise to gametes such as sperm and egg cells and the somatic cells of

prediction:  distinction between germ cells that give rise to gametes such as sperm and egg cells and the somatic cells the

 epoch: 3700 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3701 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%


 25%|██▍       | 3704/15000 [07:16<17:25, 10.81it/s]


 epoch: 3702 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3703 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3704 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%


 25%|██▍       | 3706/15000 [07:17<16:57, 11.10it/s]


 epoch: 3705 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.4%

 epoch: 3706 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 3707 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 25%|██▍       | 3710/15000 [07:17<25:08,  7.48it/s]


 epoch: 3708 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3709 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       aerarium under the supervision of the senate the roman legions which had reached an unprecedented in number because of

target:      aerarium under the supervision of the senate the roman legions which had reached an unprecedented in number because of the

prediction:  aerarium under the supervision of the senate the roman legions which had reached an unprecedented in number because of the

 epoch: 3710 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 25%|██▍       | 3712/15000 [07:18<22:15,  8.45it/s]


 epoch: 3711 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3712 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.7%

 epoch: 3713 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 25%|██▍       | 3716/15000 [07:18<18:58,  9.91it/s]


 epoch: 3714 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 3715 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3716 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%


 25%|██▍       | 3718/15000 [07:18<17:55, 10.49it/s]


 epoch: 3717 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.3%

 epoch: 3718 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.7%

 epoch: 3719 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 25%|██▍       | 3720/15000 [07:18<18:12, 10.32it/s]


input:       to language some proponents of saussure view of language have advocated formal approach which studies language structure by identifying

target:      to language some proponents of saussure view of language have advocated formal approach which studies language structure by identifying its

prediction:  to language some proponents of saussure view of language have advocated formal approach which studies language structure by identifying the

 epoch: 3720 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3721 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%


 25%|██▍       | 3724/15000 [07:19<21:37,  8.69it/s]


 epoch: 3722 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3723 | train_loss: 0.28, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3724 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 25%|██▍       | 3726/15000 [07:19<19:58,  9.41it/s]


 epoch: 3725 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3726 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3727 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.9%


 25%|██▍       | 3728/15000 [07:19<19:01,  9.87it/s]


 epoch: 3728 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3729 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%

input:       possibility of incompatibility between modularity of mind and memetics citation needed in their view minds structure certain communicable aspects

target:      possibility of incompatibility between modularity of mind and memetics citation needed in their view minds structure certain communicable aspects of

prediction:  possibility of incompatibility between modularity of mind and memetics citation needed in their view minds structure certain communicable aspects the


 25%|██▍       | 3732/15000 [07:20<18:20, 10.24it/s]


 epoch: 3730 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3731 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3732 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%


 25%|██▍       | 3734/15000 [07:20<17:57, 10.45it/s]


 epoch: 3733 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 3734 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3735 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 25%|██▍       | 3738/15000 [07:21<26:13,  7.16it/s]


 epoch: 3736 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.6%

 epoch: 3737 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.9%

 epoch: 3738 | train_loss: 0.24, train_acc: 96.6% | test_loss: 0.24, test_acc: 96.9%


 25%|██▍       | 3740/15000 [07:21<23:46,  7.89it/s]


 epoch: 3739 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.8%

input:       thales rejected non naturalistic explanations for natural phenomena and proclaimed that every event had natural cause they proposed ideas

target:      thales rejected non naturalistic explanations for natural phenomena and proclaimed that every event had natural cause they proposed ideas verified

prediction:  thales rejected non naturalistic explanations for natural phenomena and proclaimed that every event had natural cause they proposed ideas the

 epoch: 3740 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 3741 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 25%|██▍       | 3744/15000 [07:21<19:51,  9.45it/s]


 epoch: 3742 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3743 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3744 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.1%


 25%|██▍       | 3746/15000 [07:21<18:44, 10.01it/s]


 epoch: 3745 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 3746 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3747 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 25%|██▍       | 3748/15000 [07:21<18:06, 10.36it/s]


 epoch: 3748 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 3749 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

input:       to that directory the git directory will be overwritten due to the case insensitive trait of the windows and

target:      to that directory the git directory will be overwritten due to the case insensitive trait of the windows and mac

prediction:  to that directory the git directory will be overwritten due to the case insensitive trait of the windows and the


 25%|██▌       | 3750/15000 [07:22<18:23, 10.20it/s]


 epoch: 3750 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%


 25%|██▌       | 3752/15000 [07:22<30:34,  6.13it/s]


 epoch: 3751 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3752 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3753 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.1%


 25%|██▌       | 3756/15000 [07:23<22:47,  8.22it/s]


 epoch: 3754 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3755 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 3756 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 25%|██▌       | 3758/15000 [07:23<21:31,  8.70it/s]


 epoch: 3757 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.6%

 epoch: 3758 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3759 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 25%|██▌       | 3761/15000 [07:23<21:41,  8.63it/s]


input:       next day git version released on september contained patch for security vulnerability cve that allowed arbitrary code execution the

target:      next day git version released on september contained patch for security vulnerability cve that allowed arbitrary code execution the vulnerability

prediction:  next day git version released on september contained patch for security vulnerability cve that allowed arbitrary code execution the the

 epoch: 3760 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3761 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 96.9%


 25%|██▌       | 3763/15000 [07:23<21:27,  8.72it/s]


 epoch: 3762 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3763 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 25%|██▌       | 3765/15000 [07:24<25:06,  7.46it/s]


 epoch: 3764 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3765 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 25%|██▌       | 3767/15000 [07:24<22:52,  8.19it/s]


 epoch: 3766 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3767 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 25%|██▌       | 3769/15000 [07:24<21:47,  8.59it/s]


 epoch: 3768 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3769 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

input:       there were anti pagan laws but they were not generally enforced through the sixth century centers of paganism existed

target:     

 25%|██▌       | 3771/15000 [07:24<22:55,  8.16it/s]

 there were anti pagan laws but they were not generally enforced through the sixth century centers of paganism existed in

prediction:  there were anti pagan laws but they were not generally enforced through the sixth century centers of paganism existed the

 epoch: 3770 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3771 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.8%


 25%|██▌       | 3773/15000 [07:25<21:40,  8.63it/s]


 epoch: 3772 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3773 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 25%|██▌       | 3775/15000 [07:25<21:36,  8.66it/s]


 epoch: 3774 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3775 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%


 25%|██▌       | 3777/15000 [07:25<21:39,  8.64it/s]


 epoch: 3776 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3777 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 25%|██▌       | 3778/15000 [07:25<21:21,  8.76it/s]


 epoch: 3778 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 25%|██▌       | 3780/15000 [07:26<40:46,  4.59it/s]


 epoch: 3779 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.1%

input:       folded even older rocks such as the acasta gneiss of the slave craton in northwestern canada the oldest known

target:      folded even older rocks such as the acasta gneiss of the slave craton in northwestern canada the oldest known rock

prediction:  folded even older rocks such as the acasta gneiss of the slave craton in northwestern canada the oldest known the

 epoch: 3780 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%


 25%|██▌       | 3782/15000 [07:26<29:39,  6.30it/s]


 epoch: 3781 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 3782 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3783 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%


 25%|██▌       | 3786/15000 [07:26<21:18,  8.77it/s]


 epoch: 3784 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3785 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3786 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 25%|██▌       | 3788/15000 [07:27<19:25,  9.62it/s]


 epoch: 3787 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 3788 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3789 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%


 25%|██▌       | 3790/15000 [07:27<19:23,  9.64it/s]


input:       the ancient civilizations of egypt greece china india and persia all engaged in the philosophical study of psychology in

target:      the ancient civilizations of egypt greece china india and persia all engaged in the philosophical study of psychology in ancient

prediction:  the ancient civilizations of egypt greece china india and persia all engaged in the philosophical study of psychology in the

 epoch: 3790 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3791 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 25%|██▌       | 3792/15000 [07:27<18:57,  9.85it/s]


 epoch: 3792 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 3793 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 25%|██▌       | 3796/15000 [07:28<26:38,  7.01it/s]


 epoch: 3794 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.28, test_acc: 97.0%

 epoch: 3795 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3796 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 25%|██▌       | 3798/15000 [07:28<23:27,  7.96it/s]


 epoch: 3797 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3798 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3799 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%


 25%|██▌       | 3800/15000 [07:28<22:16,  8.38it/s]


input:       may at the famous golden spike event at promontory summit utah it created nationwide mechanized transportation network that revolutionized

target:      may at the famous golden spike event at promontory summit utah it created nationwide mechanized transportation network that revolutionized the

prediction:  may at the famous golden spike event at promontory summit utah it created nationwide mechanized transportation network that revolutionized the

 epoch: 3800 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3801 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%


 25%|██▌       | 3804/15000 [07:29<19:16,  9.68it/s]


 epoch: 3802 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3803 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3804 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 25%|██▌       | 3806/15000 [07:29<18:17, 10.20it/s]


 epoch: 3805 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3806 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 3807 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.3%


 25%|██▌       | 3810/15000 [07:29<20:56,  8.91it/s]


 epoch: 3808 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3809 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%

input:       ce the emperor theodosius outlawed pagan religions this is sometimes considered to mark the end of antiquity alternatively antiquity

target:      ce the emperor theodosius outlawed pagan religions this is sometimes considered to mark the end of antiquity alternatively antiquity is

prediction:  ce the emperor theodosius outlawed pagan religions this is sometimes considered to mark the end of antiquity alternatively antiquity the

 epoch: 3810 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 25%|██▌       | 3812/15000 [07:29<19:16,  9.67it/s]


 epoch: 3811 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3812 | train_loss: 0.24, train_acc: 96.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3813 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 25%|██▌       | 3816/15000 [07:30<17:19, 10.76it/s]


 epoch: 3814 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3815 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3816 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 25%|██▌       | 3818/15000 [07:30<16:51, 11.05it/s]


 epoch: 3817 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3818 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.4%

 epoch: 3819 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 25%|██▌       | 3820/15000 [07:30<17:37, 10.58it/s]


input:       few small farming communities into powerful civilization whose leaders were in complete control of the people and resources of

target:      few small farming communities into powerful civilization whose leaders were in complete control of the people and resources of the

prediction:  few small farming communities into powerful civilization whose leaders were in complete control of the people and resources of the

 epoch: 3820 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3821 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 25%|██▌       | 3822/15000 [07:30<17:43, 10.51it/s]


 epoch: 3822 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%


 25%|██▌       | 3824/15000 [07:31<27:07,  6.87it/s]


 epoch: 3823 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3824 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3825 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%


 26%|██▌       | 3828/15000 [07:31<21:10,  8.79it/s]


 epoch: 3826 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 3827 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3828 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 26%|██▌       | 3830/15000 [07:31<20:23,  9.13it/s]


 epoch: 3829 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       minorities this may affect the students self esteem and motivation as well as their access to educational opportunities for

target:      minorities this may affect the students self esteem and motivation as well as their access to educational opportunities for example

prediction:  minorities this may affect the students self esteem and motivation as well as their access to educational opportunities for the

 epoch: 3830 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

 epoch: 3831 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 26%|██▌       | 3834/15000 [07:32<18:04, 10.29it/s]


 epoch: 3832 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3833 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3834 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%


 26%|██▌       | 3836/15000 [07:32<17:40, 10.53it/s]


 epoch: 3835 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3836 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 26%|██▌       | 3838/15000 [07:32<27:48,  6.69it/s]


 epoch: 3837 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3838 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3839 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.27, test_acc: 97.0%


 26%|██▌       | 3840/15000 [07:33<25:00,  7.44it/s]


input:       was to analyze and classify different aspects of the mind primarily through the method of introspection william james john

target:      was to analyze and classify different aspects of the mind primarily through the method of introspection william james john dewey

prediction:  was to analyze and classify different aspects of the mind primarily through the method of introspection william james john the

 epoch: 3840 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.28, test_acc: 96.7%

 epoch: 3841 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%


 26%|██▌       | 3844/15000 [07:33<19:50,  9.37it/s]


 epoch: 3842 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 3843 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 3844 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%


 26%|██▌       | 3846/15000 [07:33<18:26, 10.08it/s]


 epoch: 3845 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3846 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3847 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%


 26%|██▌       | 3848/15000 [07:33<17:55, 10.37it/s]


 epoch: 3848 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3849 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.7%

input:       individual couple family or small group typically these approaches encourage new ways of thinking feeling or behaving four major

target:      individual couple family or small group typically these approaches encourage new ways of thinking feeling or behaving four major theoretical

prediction:  individual couple family or small group typically these approaches encourage new ways of thinking feeling or behaving four major the


 26%|██▌       | 3850/15000 [07:34<18:40,  9.95it/s]


 epoch: 3850 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 26%|██▌       | 3852/15000 [07:34<30:36,  6.07it/s]


 epoch: 3851 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.7%

 epoch: 3852 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3853 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 26%|██▌       | 3856/15000 [07:35<22:59,  8.08it/s]


 epoch: 3854 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3855 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 3856 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 26%|██▌       | 3858/15000 [07:35<20:36,  9.01it/s]


 epoch: 3857 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 3858 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3859 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%


 26%|██▌       | 3860/15000 [07:35<20:00,  9.28it/s]


input:       of reprehension and made no reference to light and cheerful events or to the troubling beginnings and happy endings

target:      of reprehension and made no reference to light and cheerful events or to the troubling beginnings and happy endings associated

prediction:  of reprehension and made no reference to light and cheerful events or to the troubling beginnings and happy endings the

 epoch: 3860 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3861 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 26%|██▌       | 3864/15000 [07:35<18:20, 10.12it/s]


 epoch: 3862 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3863 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3864 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3865 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 26%|██▌       | 3868/15000 [07:36<26:40,  6.95it/s]


 epoch: 3866 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.9%

 epoch: 3867 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 3868 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.29, test_acc: 96.7%


 26%|██▌       | 3870/15000 [07:36<25:09,  7.37it/s]


 epoch: 3869 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%

input:       or menes in greek who was believed to have united the two kingdoms of upper and lower egypt the

target:      or menes in greek who was believed to have united the two kingdoms of upper and lower egypt the transition

prediction:  or menes in greek who was believed to have united the two kingdoms of upper and lower egypt the the

 epoch: 3870 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.0%


 26%|██▌       | 3872/15000 [07:36<22:56,  8.08it/s]


 epoch: 3871 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3872 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3873 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 26%|██▌       | 3876/15000 [07:37<20:23,  9.09it/s]


 epoch: 3874 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.27, test_acc: 97.1%

 epoch: 3875 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3876 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.7%


 26%|██▌       | 3878/15000 [07:37<20:04,  9.23it/s]


 epoch: 3877 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3878 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 26%|██▌       | 3879/15000 [07:37<19:50,  9.34it/s]


 epoch: 3879 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       separate but connected systems of sign and meaning goes back to the first linguistic studies of de saussure and

target:      separate but connected systems of sign and meaning goes back to the first linguistic studies of de saussure and is

prediction:  separate but connected systems of sign and meaning goes back to the first linguistic studies of de saussure and the


 26%|██▌       | 3882/15000 [07:38<33:12,  5.58it/s]


 epoch: 3880 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 3881 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3882 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 26%|██▌       | 3884/15000 [07:38<28:04,  6.60it/s]


 epoch: 3883 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3884 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 26%|██▌       | 3886/15000 [07:38<24:32,  7.55it/s]


 epoch: 3885 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3886 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 26%|██▌       | 3888/15000 [07:39<22:37,  8.18it/s]


 epoch: 3887 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3888 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.28, test_acc: 96.8%

 epoch: 3889 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 26%|██▌       | 3890/15000 [07:39<21:20,  8.67it/s]


input:       large regions of egypt were covered in treed savanna and traversed by herds of grazing ungulates foliage and fauna

target:      large regions of egypt were covered in treed savanna and traversed by herds of grazing ungulates foliage and fauna were

prediction:  large regions of egypt were covered in treed savanna and traversed by herds of grazing ungulates foliage and fauna the

 epoch: 3890 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.27, test_acc: 97.2%

 epoch: 3891 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%


 26%|██▌       | 3892/15000 [07:39<19:50,  9.33it/s]


 epoch: 3892 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3893 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 26%|██▌       | 3896/15000 [07:40<22:34,  8.20it/s]


 epoch: 3894 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 3895 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%

 epoch: 3896 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.1%


 26%|██▌       | 3899/15000 [07:40<19:37,  9.43it/s]


 epoch: 3897 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 3898 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 3899 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 26%|██▌       | 3901/15000 [07:40<19:15,  9.60it/s]


input:       ireland scandinavia germany italy russia and other parts of central and eastern europe another million came from canada most

target:      ireland scandinavia germany italy russia and other parts of central and eastern europe another million came from canada most came

prediction:  ireland scandinavia germany italy russia and other parts of central and eastern europe another million came from canada most the

 epoch: 3900 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3901 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 26%|██▌       | 3903/15000 [07:40<18:38,  9.92it/s]


 epoch: 3902 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3903 | train_loss: 0.27, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 3904 | train_loss: 0.26, train_acc: 96.0% | test_loss: 0.27, test_acc: 96.9%


 26%|██▌       | 3907/15000 [07:41<17:48, 10.39it/s]


 epoch: 3905 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 3906 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3907 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 3908 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 26%|██▌       | 3910/15000 [07:41<28:34,  6.47it/s]


 epoch: 3909 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

input:       system that governs how words and morphemes are combined to form phrases and utterances the scientific study of language

target:      system that governs how words and morphemes are combined to form phrases and utterances the scientific study of language is

prediction:  system that governs how words and morphemes are combined to form phrases and utterances the scientific study of language the

 epoch: 3910 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%


 26%|██▌       | 3912/15000 [07:42<24:24,  7.57it/s]


 epoch: 3911 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3912 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3913 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.1%


 26%|██▌       | 3916/15000 [07:42<19:38,  9.41it/s]


 epoch: 3914 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 3915 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.8%

 epoch: 3916 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 26%|██▌       | 3918/15000 [07:42<18:16, 10.10it/s]


 epoch: 3917 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3918 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3919 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 26%|██▌       | 3920/15000 [07:42<19:03,  9.69it/s]


input:       it acted mostly as validation of what we were thinking about attempting in message to the reddit community colbert

target:      it acted mostly as validation of what we were thinking about attempting in message to the reddit community colbert later

prediction:  it acted mostly as validation of what we were thinking about attempting in message to the reddit community colbert the

 epoch: 3920 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3921 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 26%|██▌       | 3922/15000 [07:43<18:31,  9.97it/s]


 epoch: 3922 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 26%|██▌       | 3924/15000 [07:43<30:31,  6.05it/s]


 epoch: 3923 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.8%

 epoch: 3924 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3925 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 26%|██▌       | 3928/15000 [07:43<22:47,  8.10it/s]


 epoch: 3926 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3927 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3928 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 26%|██▌       | 3930/15000 [07:44<21:22,  8.63it/s]


 epoch: 3929 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

input:       such as dates and figs wine and meat were enjoyed by all on feast days while the upper classes

target:      such as dates and figs wine and meat were enjoyed by all on feast days while the upper classes indulged

prediction:  such as dates and figs wine and meat were enjoyed by all on feast days while the upper classes the

 epoch: 3930 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3931 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 26%|██▌       | 3934/15000 [07:44<18:40,  9.88it/s]


 epoch: 3932 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 3933 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.4%

 epoch: 3934 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%


 26%|██▌       | 3936/15000 [07:44<17:48, 10.35it/s]


 epoch: 3935 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3936 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 26%|██▋       | 3938/15000 [07:45<21:21,  8.63it/s]


 epoch: 3937 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3938 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.7%

 epoch: 3939 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 26%|██▋       | 3940/15000 [07:45<20:31,  8.98it/s]


input:       to the big bang models the universe at the beginning was very hot and very compact and since then

target:      to the big bang models the universe at the beginning was very hot and very compact and since then it

prediction:  to the big bang models the universe at the beginning was very hot and very compact and since then the

 epoch: 3940 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3941 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 26%|██▋       | 3944/15000 [07:45<18:09, 10.14it/s]


 epoch: 3942 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3943 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 3944 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.6%


 26%|██▋       | 3946/15000 [07:45<17:25, 10.57it/s]


 epoch: 3945 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3946 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 3947 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 26%|██▋       | 3950/15000 [07:46<17:22, 10.60it/s]


 epoch: 3948 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%

 epoch: 3949 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.2%

input:       have seriously affected the ability of the empire to defend itself by the roman empire broke up into three

target:      have seriously affected the ability of the empire to defend itself by the roman empire broke up into three competing

prediction:  have seriously affected the ability of the empire to defend itself by the roman empire broke up into three the

 epoch: 3950 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.3%


 26%|██▋       | 3952/15000 [07:46<17:15, 10.67it/s]


 epoch: 3951 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 3952 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 3953 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.1%


 26%|██▋       | 3956/15000 [07:46<16:02, 11.47it/s]


 epoch: 3954 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 3955 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

 epoch: 3956 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%


 26%|██▋       | 3958/15000 [07:46<15:44, 11.69it/s]


 epoch: 3957 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 3958 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3959 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 26%|██▋       | 3960/15000 [07:46<16:31, 11.13it/s]


input:       players must acquire resources to build in the world and maintain health and creative mode in which players have

target:      players must acquire resources to build in the world and maintain health and creative mode in which players have unlimited

prediction:  players must acquire resources to build in the world and maintain health and creative mode in which players have the

 epoch: 3960 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3961 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.8%


 26%|██▋       | 3964/15000 [07:47<16:17, 11.29it/s]


 epoch: 3962 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 3963 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3964 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 3965 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%


 26%|██▋       | 3968/15000 [07:48<24:47,  7.42it/s]


 epoch: 3966 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 3967 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3968 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 26%|██▋       | 3970/15000 [07:48<22:47,  8.07it/s]


 epoch: 3969 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       women the elderly and middle aged russian men the entire written works of humanity from the beginning of recorded

target:      women the elderly and middle aged russian men the entire written works of humanity from the beginning of recorded history

prediction:  women the elderly and middle aged russian men the entire written works of humanity from the beginning of recorded the

 epoch: 3970 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3971 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 26%|██▋       | 3974/15000 [07:48<19:00,  9.67it/s]


 epoch: 3972 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3973 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.27, test_acc: 97.0%

 epoch: 3974 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 27%|██▋       | 3976/15000 [07:48<17:56, 10.24it/s]


 epoch: 3975 | train_loss: 0.26, train_acc: 96.5% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3976 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 3977 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 27%|██▋       | 3978/15000 [07:49<17:28, 10.52it/s]


 epoch: 3978 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 3979 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.1%

input:       theory begins with plato who attributed to socrates as semi historical dialogue character in the philebus the view

target:      theory begins with plato who attributed to socrates as semi historical dialogue character in the philebus the view that

prediction:  theory begins with plato who attributed to socrates as semi historical dialogue character in the philebus the view and


 27%|██▋       | 3981/15000 [07:49<27:46,  6.61it/s]


 epoch: 3980 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3981 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%


 27%|██▋       | 3983/15000 [07:49<25:13,  7.28it/s]


 epoch: 3982 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.7%

 epoch: 3983 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 27%|██▋       | 3985/15000 [07:50<22:52,  8.03it/s]


 epoch: 3984 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3985 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%


 27%|██▋       | 3987/15000 [07:50<21:34,  8.51it/s]


 epoch: 3986 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 3987 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%


 27%|██▋       | 3989/15000 [07:50<20:36,  8.90it/s]


 epoch: 3988 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 3989 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

input:       for general aviation and other activities of the fifty busiest container ports four are located in the united states


 27%|██▋       | 3991/15000 [07:50<23:25,  7.83it/s]


target:      for general aviation and other activities of the fifty busiest container ports four are located in the united states of

prediction:  for general aviation and other activities of the fifty busiest container ports four are located in the united states the

 epoch: 3990 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3991 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.1%


 27%|██▋       | 3992/15000 [07:51<22:49,  8.04it/s]


 epoch: 3992 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3993 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.2%


 27%|██▋       | 3995/15000 [07:51<34:45,  5.28it/s]


 epoch: 3994 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 3995 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 27%|██▋       | 3998/15000 [07:52<24:20,  7.53it/s]


 epoch: 3996 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 3997 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 3998 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 27%|██▋       | 4000/15000 [07:52<22:14,  8.24it/s]


 epoch: 3999 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.8%

input:       dubbed as survival test indev and infdev were released in and the first major update dubbed alpha was released

target:      dubbed as survival test indev and infdev were released in and the first major update dubbed alpha was released on

prediction:  dubbed as survival test indev and infdev were released in and the first major update dubbed alpha was released the

 epoch: 4000 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4001 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 27%|██▋       | 4002/15000 [07:52<20:12,  9.07it/s]



 epoch: 4002 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.8%

 epoch: 4003 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 27%|██▋       | 4006/15000 [07:52<18:07, 10.11it/s]


 epoch: 4004 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4005 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4006 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%


 27%|██▋       | 4008/15000 [07:53<17:48, 10.29it/s]


 epoch: 4007 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4008 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 27%|██▋       | 4010/15000 [07:53<31:11,  5.87it/s]


 epoch: 4009 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.8%

input:       trumpets oboes and pipes developed later and became popular in the new kingdom the egyptians played on bells cymbals

target:      trumpets oboes and pipes developed later and became popular in the new kingdom the egyptians played on bells cymbals tambourines

prediction:  trumpets oboes and pipes developed later and became popular in the new kingdom the egyptians played on bells cymbals the

 epoch: 4010 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4011 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.4%


 27%|██▋       | 4014/15000 [07:54<22:55,  7.99it/s]


 epoch: 4012 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4013 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4014 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 27%|██▋       | 4016/15000 [07:54<20:31,  8.92it/s]


 epoch: 4015 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4016 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4017 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 27%|██▋       | 4018/15000 [07:54<19:12,  9.53it/s]


 epoch: 4018 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.8%

 epoch: 4019 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

input:       tons in the production of oil brazil was the th largest oil producer in the world in with million

target:      tons in the production of oil brazil was the th largest oil producer in the world in with million barrels

prediction:  tons in the production of oil brazil was the th largest oil producer in the world in with million the


 27%|██▋       | 4022/15000 [07:54<18:20,  9.97it/s]


 epoch: 4020 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4021 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.8%

 epoch: 4022 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 27%|██▋       | 4024/15000 [07:55<23:46,  7.69it/s]


 epoch: 4023 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4024 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.8%

 epoch: 4025 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 27%|██▋       | 4028/15000 [07:55<19:46,  9.25it/s]


 epoch: 4026 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4027 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4028 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 27%|██▋       | 4030/15000 [07:55<19:14,  9.50it/s]


 epoch: 4029 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.4%

input:       modern school systems organize students by age competence specialization and native language into different classes to ensure productive learning

target:      modern school systems organize students by age competence specialization and native language into different classes to ensure productive learning process

prediction:  modern school systems organize students by age competence specialization and native language into different classes to ensure productive learning the

 epoch: 4030 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4031 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%


 27%|██▋       | 4034/15000 [07:56<17:29, 10.44it/s]


 epoch: 4032 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4033 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4034 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 27%|██▋       | 4036/15000 [07:56<17:20, 10.53it/s]


 epoch: 4035 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4036 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4037 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%


 27%|██▋       | 4040/15000 [07:57<25:42,  7.10it/s]


 epoch: 4038 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4039 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

input:       over the lifespan an early example of personality assessment was the woodworth personal data sheet constructed during world war

target:      over the lifespan an early example of personality assessment was the woodworth personal data sheet constructed during world war the

prediction:  over the lifespan an early example of personality assessment was the woodworth personal data sheet constructed during world war the

 epoch: 4040 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 27%|██▋       | 4042/15000 [07:57<22:35,  8.08it/s]


 epoch: 4041 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4042 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4043 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.1%


 27%|██▋       | 4046/15000 [07:57<18:58,  9.62it/s]


 epoch: 4044 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4045 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4046 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.2%


 27%|██▋       | 4048/15000 [07:57<18:03, 10.10it/s]


 epoch: 4047 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4048 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4049 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 27%|██▋       | 4050/15000 [07:57<18:33,  9.84it/s]


input:       first texture pack for the xbox edition was released on september and was themed after the mass effect franchise

target:      first texture pack for the xbox edition was released on september and was themed after the mass effect franchise unlike

prediction:  first texture pack for the xbox edition was released on september and was themed after the mass effect franchise the

 epoch: 4050 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.1%

 epoch: 4051 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 27%|██▋       | 4054/15000 [07:58<25:35,  7.13it/s]


 epoch: 4052 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4053 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 4054 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%


 27%|██▋       | 4056/15000 [07:58<22:37,  8.06it/s]


 epoch: 4055 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4056 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4057 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%


 27%|██▋       | 4060/15000 [07:59<19:47,  9.22it/s]


 epoch: 4058 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4059 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

input:       highlands well enough established to be perceived by the egyptians as possible challenge to their hegemony but an ethnic

target:      highlands well enough established to be perceived by the egyptians as possible challenge to their hegemony but an ethnic group

prediction:  highlands well enough established to be perceived by the egyptians as possible challenge to their hegemony but an ethnic the

 epoch: 4060 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 27%|██▋       | 4062/15000 [07:59<18:42,  9.74it/s]


 epoch: 4061 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4062 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4063 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 27%|██▋       | 4064/15000 [07:59<18:10, 10.03it/s]


 epoch: 4064 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4065 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 27%|██▋       | 4068/15000 [08:00<21:59,  8.29it/s]


 epoch: 4066 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4067 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4068 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.4%


 27%|██▋       | 4070/15000 [08:00<20:39,  8.82it/s]


 epoch: 4069 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.28, test_acc: 97.1%

input:       in march lorenzo de medici son giovanni assumed the papacy as leo leonardo went to rome that september where

target:      in march lorenzo de medici son giovanni assumed the papacy as leo leonardo went to rome that september where he

prediction:  in march lorenzo de medici son giovanni assumed the papacy as leo leonardo went to rome that september where the

 epoch: 4070 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4071 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 27%|██▋       | 4074/15000 [08:00<17:58, 10.13it/s]


 epoch: 4072 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4073 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4074 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 27%|██▋       | 4076/15000 [08:00<17:42, 10.28it/s]


 epoch: 4075 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4076 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4077 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 27%|██▋       | 4080/15000 [08:01<17:36, 10.34it/s]


 epoch: 4078 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4079 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       pericles who used the tribute paid by the members of the delian league to build the parthenon and other

target:      pericles who used the tribute paid by the members of the delian league to build the parthenon and other great

prediction:  pericles who used the tribute paid by the members of the delian league to build the parthenon and other the

 epoch: 4080 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 27%|██▋       | 4082/15000 [08:01<23:04,  7.89it/s]


 epoch: 4081 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4082 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4083 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 27%|██▋       | 4085/15000 [08:02<21:20,  8.53it/s]


 epoch: 4084 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4085 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%


 27%|██▋       | 4087/15000 [08:02<21:23,  8.50it/s]


 epoch: 4086 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4087 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 27%|██▋       | 4089/15000 [08:02<22:49,  7.96it/s]


 epoch: 4088 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.27, test_acc: 97.0%

 epoch: 4089 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 27%|██▋       | 4090/15000 [08:02<25:09,  7.23it/s]


input:       range of sports are played in the continent of south america with football being the most popular overall while

target:      range of sports are played in the continent of south america with football being the most popular overall while baseball

prediction:  range of sports are played in the continent of south america with football being the most popular overall while the

 epoch: 4090 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 27%|██▋       | 4093/15000 [08:03<22:57,  7.92it/s]


 epoch: 4091 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4092 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4093 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.9%


 27%|██▋       | 4094/15000 [08:03<22:34,  8.05it/s]


 epoch: 4094 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 27%|██▋       | 4096/15000 [08:03<39:00,  4.66it/s]


 epoch: 4095 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4096 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%


 27%|██▋       | 4098/15000 [08:04<28:49,  6.30it/s]


 epoch: 4097 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4098 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%


 27%|██▋       | 4100/15000 [08:04<26:24,  6.88it/s]


 epoch: 4099 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.26, test_acc: 97.0%

input:       it was elaborated upon by arabic writers and islamic philosophers such as abu bishr and his pupils al farabi

target:      it was elaborated upon by arabic writers and islamic philosophers such as abu bishr and his pupils al farabi avicenna

prediction:  it was elaborated upon by arabic writers and islamic philosophers such as abu bishr and his pupils al farabi the

 epoch: 4100 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 27%|██▋       | 4102/15000 [08:04<23:09,  7.84it/s]


 epoch: 4101 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4102 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 27%|██▋       | 4104/15000 [08:04<23:02,  7.88it/s]


 epoch: 4103 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4104 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 27%|██▋       | 4106/15000 [08:05<22:02,  8.24it/s]


 epoch: 4105 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4106 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.8%

 epoch: 4107 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 27%|██▋       | 4109/15000 [08:05<22:29,  8.07it/s]


 epoch: 4108 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4109 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 27%|██▋       | 4110/15000 [08:05<22:59,  7.90it/s]


input:       the early days of release evan lahti from pc gamer noted that the majority of new official maps in

target:      the early days of release evan lahti from pc gamer noted that the majority of new official maps in global

prediction:  the early days of release evan lahti from pc gamer noted that the majority of new official maps in the

 epoch: 4110 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.3%

 epoch: 4111 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 27%|██▋       | 4114/15000 [08:05<18:33,  9.78it/s]


 epoch: 4112 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4113 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4114 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 27%|██▋       | 4116/15000 [08:06<17:44, 10.23it/s]


 epoch: 4115 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 4116 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4117 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 27%|██▋       | 4118/15000 [08:06<17:02, 10.64it/s]


 epoch: 4118 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

 epoch: 4119 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.8%

input:       developments in greek culture and society leading to the classical period from the persian invasion of greece in bc

target:      developments in greek culture and society leading to the classical period from the persian invasion of greece in bc until

prediction:  developments in greek culture and society leading to the classical period from the persian invasion of greece in bc the


 27%|██▋       | 4122/15000 [08:06<17:41, 10.25it/s]


 epoch: 4120 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4121 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4122 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4123 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 28%|██▊       | 4126/15000 [08:07<25:28,  7.11it/s]


 epoch: 4124 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4125 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4126 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 28%|██▊       | 4128/15000 [08:07<22:33,  8.03it/s]


 epoch: 4127 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4128 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

 epoch: 4129 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 28%|██▊       | 4130/15000 [08:07<21:48,  8.30it/s]


input:       research on perception cognition attention emotion intelligence subjective experiences motivation brain functioning and personality psychologists interests extend to interpersonal

target:      research on perception cognition attention emotion intelligence subjective experiences motivation brain functioning and personality psychologists interests extend to interpersonal relationships

prediction:  research on perception cognition attention emotion intelligence subjective experiences motivation brain functioning and personality psychologists interests extend to interpersonal the

 epoch: 4130 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4131 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 28%|██▊       | 4134/15000 [08:08<19:14,  9.41it/s]


 epoch: 4132 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4133 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.7%

 epoch: 4134 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%


 28%|██▊       | 4136/15000 [08:08<18:30,  9.78it/s]


 epoch: 4135 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4136 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.3%

 epoch: 4137 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 28%|██▊       | 4140/15000 [08:09<26:29,  6.83it/s]


 epoch: 4138 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4139 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       not yet sure of the potential success of minecraft at this point and backed off from acquisition after microsoft

target:      not yet sure of the potential success of minecraft at this point and backed off from acquisition after microsoft brought

prediction:  not yet sure of the potential success of minecraft at this point and backed off from acquisition after microsoft the

 epoch: 4140 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 28%|██▊       | 4142/15000 [08:09<23:10,  7.81it/s]


 epoch: 4141 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4142 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4143 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 28%|██▊       | 4146/15000 [08:09<19:37,  9.22it/s]


 epoch: 4144 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4145 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4146 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 28%|██▊       | 4148/15000 [08:09<18:44,  9.65it/s]


 epoch: 4147 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4148 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4149 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 28%|██▊       | 4150/15000 [08:10<18:56,  9.55it/s]


input:       in his book darwin dangerous idea daniel dennett points to the existence of self regulating correction mechanisms vaguely resembling

target:      in his book darwin dangerous idea daniel dennett points to the existence of self regulating correction mechanisms vaguely resembling those

prediction:  in his book darwin dangerous idea daniel dennett points to the existence of self regulating correction mechanisms vaguely resembling the

 epoch: 4150 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4151 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 28%|██▊       | 4154/15000 [08:10<19:47,  9.13it/s]


 epoch: 4152 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 4153 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4154 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 28%|██▊       | 4156/15000 [08:10<18:34,  9.73it/s]


 epoch: 4155 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4156 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4157 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 28%|██▊       | 4160/15000 [08:11<17:48, 10.14it/s]


 epoch: 4158 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4159 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

input:       released adding gameplay and matchmaking changes new maps and new cosmetic items in january an update adding flick stick

target:      released adding gameplay and matchmaking changes new maps and new cosmetic items in january an update adding flick stick support

prediction:  released adding gameplay and matchmaking changes new maps and new cosmetic items in january an update adding flick stick the

 epoch: 4160 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 28%|██▊       | 4162/15000 [08:11<17:23, 10.39it/s]


 epoch: 4161 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4162 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%

 epoch: 4163 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.8%


 28%|██▊       | 4164/15000 [08:11<17:22, 10.40it/s]


 epoch: 4164 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.2%

 epoch: 4165 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 28%|██▊       | 4168/15000 [08:12<25:52,  6.98it/s]


 epoch: 4166 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4167 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4168 | train_loss: 0.26, train_acc: 95.9% | test_loss: 0.26, test_acc: 97.1%


 28%|██▊       | 4170/15000 [08:12<23:37,  7.64it/s]


 epoch: 4169 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

input:       edition supported downloadable content which was available to purchase via the xbox games store these content packs usually contained

target:      edition supported downloadable content which was available to purchase via the xbox games store these content packs usually contained additional

prediction:  edition supported downloadable content which was available to purchase via the xbox games store these content packs usually contained the

 epoch: 4170 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4171 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 28%|██▊       | 4174/15000 [08:12<19:22,  9.31it/s]


 epoch: 4172 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4173 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4174 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 28%|██▊       | 4176/15000 [08:13<18:29,  9.76it/s]


 epoch: 4175 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.3%

 epoch: 4176 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4177 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 28%|██▊       | 4178/15000 [08:13<17:53, 10.08it/s]


 epoch: 4178 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.7%

 epoch: 4179 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       object oriented programming method call executed locally on proxy object invokes the corresponding method on the remote object using

target:      object oriented programming method call executed locally on proxy object invokes the corresponding method on the remote object using the

prediction:  object oriented programming method call executed locally on proxy object invokes the corresponding method on the remote object using the


 28%|██▊       | 4180/15000 [08:13<18:45,  9.61it/s]


 epoch: 4180 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 28%|██▊       | 4182/15000 [08:14<27:05,  6.66it/s]


 epoch: 4181 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4182 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4183 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 28%|██▊       | 4186/15000 [08:14<21:24,  8.42it/s]


 epoch: 4184 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4185 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4186 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 28%|██▊       | 4188/15000 [08:14<19:37,  9.18it/s]


 epoch: 4187 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4188 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4189 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 28%|██▊       | 4190/15000 [08:14<19:40,  9.16it/s]


input:       either description can be accurate the expansion of space implied by the flrw metric is only mathematical convention corresponding

target:      either description can be accurate the expansion of space implied by the flrw metric is only mathematical convention corresponding to

prediction:  either description can be accurate the expansion of space implied by the flrw metric is only mathematical convention corresponding the

 epoch: 4190 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4191 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.8%


 28%|██▊       | 4194/15000 [08:15<17:45, 10.14it/s]


 epoch: 4192 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4193 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4194 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 28%|██▊       | 4196/15000 [08:15<20:09,  8.93it/s]


 epoch: 4195 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 4196 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 28%|██▊       | 4199/15000 [08:15<20:27,  8.80it/s]


 epoch: 4197 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4198 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4199 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

input:       fossils to fossilised multicellular organisms existing patterns of biodiversity have been shaped by repeated formations of new species speciation


 28%|██▊       | 4201/15000 [08:16<22:44,  7.91it/s]


target:      fossils to fossilised multicellular organisms existing patterns of biodiversity have been shaped by repeated formations of new species speciation changes

prediction:  fossils to fossilised multicellular organisms existing patterns of biodiversity have been shaped by repeated formations of new species speciation the

 epoch: 4200 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4201 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%


 28%|██▊       | 4203/15000 [08:16<21:15,  8.47it/s]


 epoch: 4202 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4203 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%


 28%|██▊       | 4205/15000 [08:16<20:43,  8.68it/s]


 epoch: 4204 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4205 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 28%|██▊       | 4207/15000 [08:16<20:36,  8.73it/s]


 epoch: 4206 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4207 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 28%|██▊       | 4208/15000 [08:16<20:52,  8.62it/s]


 epoch: 4208 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 28%|██▊       | 4210/15000 [08:17<33:30,  5.37it/s]


 epoch: 4209 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

input:       larger be dominated by one city particularly athens sparta and thebes and often poleis would be compelled to join

target:      larger be dominated by one city particularly athens sparta and thebes and often poleis would be compelled to join under

prediction:  larger be dominated by one city particularly athens sparta and thebes and often poleis would be compelled to join the

 epoch: 4210 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 28%|██▊       | 4212/15000 [08:17<26:24,  6.81it/s]


 epoch: 4211 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4212 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 28%|██▊       | 4215/15000 [08:18<20:59,  8.56it/s]


 epoch: 4213 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4214 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4215 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%


 28%|██▊       | 4218/15000 [08:18<19:10,  9.37it/s]


 epoch: 4216 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.3%

 epoch: 4217 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.2%

 epoch: 4218 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 28%|██▊       | 4220/15000 [08:18<20:22,  8.82it/s]


 epoch: 4219 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

input:       attributes was discussed in the time of charles darwin huxley claimed that the struggle for existence holds as

target:      attributes was discussed in the time of charles darwin huxley claimed that the struggle for existence holds as much

prediction:  attributes was discussed in the time of charles darwin huxley claimed that the struggle for existence holds as the

 epoch: 4220 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 28%|██▊       | 4223/15000 [08:18<18:30,  9.71it/s]


 epoch: 4221 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4222 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4223 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 28%|██▊       | 4226/15000 [08:19<27:58,  6.42it/s]


 epoch: 4224 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4225 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4226 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 28%|██▊       | 4228/15000 [08:19<23:36,  7.60it/s]


 epoch: 4227 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4228 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4229 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.7%


 28%|██▊       | 4230/15000 [08:19<21:52,  8.21it/s]


input:       alexandria and antioch was almost twice the size of any european city at the beginning of the th century

target:      alexandria and antioch was almost twice the size of any european city at the beginning of the th century as

prediction:  alexandria and antioch was almost twice the size of any european city at the beginning of the th century the

 epoch: 4230 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4231 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 28%|██▊       | 4234/15000 [08:20<18:30,  9.69it/s]


 epoch: 4232 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4233 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4234 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 28%|██▊       | 4236/15000 [08:20<17:33, 10.22it/s]


 epoch: 4235 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4236 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 4237 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 28%|██▊       | 4238/15000 [08:20<24:30,  7.32it/s]


 epoch: 4238 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4239 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

input:       definition consists of discrete political entities and so excludes the bonin islands hawaii clipperton island and the juan fern

target:      definition consists of discrete political entities and so excludes the bonin islands hawaii clipperton island and the juan fern ndez

prediction:  definition consists of discrete political entities and so excludes the bonin islands hawaii clipperton island and the juan fern the

 epoch: 4240 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 28%|██▊       | 4243/15000 [08:21<19:25,  9.23it/s]


 epoch: 4241 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4242 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.27, test_acc: 97.1%

 epoch: 4243 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 28%|██▊       | 4245/15000 [08:21<17:57,  9.98it/s]


 epoch: 4244 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4245 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4246 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 28%|██▊       | 4249/15000 [08:21<16:38, 10.76it/s]


 epoch: 4247 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4248 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4249 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 28%|██▊       | 4251/15000 [08:22<17:25, 10.28it/s]


input:       rome founding became the most familiar version in modern era literature among imperial historians who wrote in greek are

target:      rome founding became the most familiar version in modern era literature among imperial historians who wrote in greek are dionysius

prediction:  rome founding became the most familiar version in modern era literature among imperial historians who wrote in greek are the

 epoch: 4250 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4251 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.2%


 28%|██▊       | 4253/15000 [08:22<29:09,  6.14it/s]


 epoch: 4252 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 4253 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4254 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 28%|██▊       | 4257/15000 [08:23<21:45,  8.23it/s]


 epoch: 4255 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4256 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4257 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 28%|██▊       | 4259/15000 [08:23<19:49,  9.03it/s]


 epoch: 4258 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4259 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

input:       areas involved in detecting and resolving incongruity mismatch between expected and presented stimuli and the mesocorticolimbic dopaminergic system and

target:      areas involved in detecting and resolving incongruity mismatch between expected and presented stimuli and the mesocorticolimbic dopaminergic system and the

prediction:  areas involved in detecting and resolving incongruity mismatch between expected and presented stimuli and the mesocorticolimbic dopaminergic system and the

 epoch: 4260 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 28%|██▊       | 4263/15000 [08:23<18:15,  9.80it/s]


 epoch: 4261 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 4262 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 4263 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%


 28%|██▊       | 4265/15000 [08:23<17:42, 10.10it/s]


 epoch: 4264 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4265 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4266 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 28%|██▊       | 4269/15000 [08:24<24:42,  7.24it/s]


 epoch: 4267 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4268 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4269 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 28%|██▊       | 4270/15000 [08:24<24:26,  7.32it/s]


input:       imperial office on may and became the first roman emperor to voluntarily abdicate the position john vi retired to

target:      imperial office on may and became the first roman emperor to voluntarily abdicate the position john vi retired to monastery

prediction:  imperial office on may and became the first roman emperor to voluntarily abdicate the position john vi retired to the

 epoch: 4270 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4271 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 28%|██▊       | 4274/15000 [08:25<19:32,  9.15it/s]


 epoch: 4272 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4273 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4274 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 29%|██▊       | 4276/15000 [08:25<18:01,  9.92it/s]


 epoch: 4275 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4276 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4277 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 29%|██▊       | 4280/15000 [08:25<17:14, 10.36it/s]


 epoch: 4278 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4279 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

input:       delta region as well as the lucrative and critical trade routes to the levant the increasing power and wealth

target:      delta region as well as the lucrative and critical trade routes to the levant the increasing power and wealth of

prediction:  delta region as well as the lucrative and critical trade routes to the levant the increasing power and wealth the

 epoch: 4280 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 29%|██▊       | 4282/15000 [08:26<24:08,  7.40it/s]


 epoch: 4281 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4282 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4283 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.28, test_acc: 97.1%


 29%|██▊       | 4286/15000 [08:26<19:42,  9.06it/s]


 epoch: 4284 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%

 epoch: 4285 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 4286 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 29%|██▊       | 4288/15000 [08:26<18:32,  9.63it/s]


 epoch: 4287 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4288 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4289 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%


 29%|██▊       | 4290/15000 [08:26<18:31,  9.63it/s]


input:       lebanese and chinese have also developed in the larger coastal cities of west and east africa respectively source maddison

target:      lebanese and chinese have also developed in the larger coastal cities of west and east africa respectively source maddison and

prediction:  lebanese and chinese have also developed in the larger coastal cities of west and east africa respectively source maddison the

 epoch: 4290 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4291 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 29%|██▊       | 4294/15000 [08:27<17:34, 10.15it/s]


 epoch: 4292 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4293 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4294 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.2%


 29%|██▊       | 4296/15000 [08:27<28:45,  6.20it/s]


 epoch: 4295 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

 epoch: 4296 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4297 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.6%


 29%|██▊       | 4300/15000 [08:28<22:34,  7.90it/s]


 epoch: 4298 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4299 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%

input:       the fire spawning the legend of him fiddling while rome burned which is almost certainly untrue the domus aurea

target:      the fire spawning the legend of him fiddling while rome burned which is almost certainly untrue the domus aurea was

prediction:  the fire spawning the legend of him fiddling while rome burned which is almost certainly untrue the domus aurea the

 epoch: 4300 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 29%|██▊       | 4302/15000 [08:28<20:22,  8.75it/s]


 epoch: 4301 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4302 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4303 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.2%


 29%|██▊       | 4305/15000 [08:28<20:00,  8.91it/s]


 epoch: 4304 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4305 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 29%|██▊       | 4307/15000 [08:28<20:30,  8.69it/s]


 epoch: 4306 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4307 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.8%


 29%|██▊       | 4308/15000 [08:28<20:53,  8.53it/s]


 epoch: 4308 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%


 29%|██▊       | 4310/15000 [08:29<38:48,  4.59it/s]


 epoch: 4309 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

input:       likely existed as community centers the peoples of this area grew beans cotton peanuts and sweet potatoes fished in

target:      likely existed as community centers the peoples of this area grew beans cotton peanuts and sweet potatoes fished in the

prediction:  likely existed as community centers the peoples of this area grew beans cotton peanuts and sweet potatoes fished in the

 epoch: 4310 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 29%|██▊       | 4312/15000 [08:29<30:21,  5.87it/s]


 epoch: 4311 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4312 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 29%|██▉       | 4314/15000 [08:30<24:52,  7.16it/s]


 epoch: 4313 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%

 epoch: 4314 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 29%|██▉       | 4316/15000 [08:30<24:24,  7.30it/s]


 epoch: 4315 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4316 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%


 29%|██▉       | 4318/15000 [08:30<22:32,  7.90it/s]


 epoch: 4317 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%

 epoch: 4318 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 29%|██▉       | 4320/15000 [08:30<25:19,  7.03it/s]


 epoch: 4319 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       grant access to operation objectives which are spread over different game modes such as arms race and deathmatch or

target:      grant access to operation objectives which are spread over different game modes such as arms race and deathmatch or in

prediction:  grant access to operation objectives which are spread over different game modes such as arms race and deathmatch or the

 epoch: 4320 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 29%|██▉       | 4322/15000 [08:31<23:16,  7.65it/s]


 epoch: 4321 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4322 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 29%|██▉       | 4324/15000 [08:31<24:14,  7.34it/s]


 epoch: 4323 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4324 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4325 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%


 29%|██▉       | 4328/15000 [08:31<18:14,  9.75it/s]


 epoch: 4326 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4327 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4328 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 29%|██▉       | 4330/15000 [08:32<18:16,  9.73it/s]


 epoch: 4329 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

input:       no universal agreement on the definition of life scientists generally accept that the biological manifestation of life is characterized

target:      no universal agreement on the definition of life scientists generally accept that the biological manifestation of life is characterized by

prediction:  no universal agreement on the definition of life scientists generally accept that the biological manifestation of life is characterized the

 epoch: 4330 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 29%|██▉       | 4332/15000 [08:32<17:37, 10.09it/s]


 epoch: 4331 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4332 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4333 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 29%|██▉       | 4336/15000 [08:32<17:09, 10.36it/s]


 epoch: 4334 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%

 epoch: 4335 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4336 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4337 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.1%


 29%|██▉       | 4340/15000 [08:33<25:34,  6.95it/s]


 epoch: 4338 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.28, test_acc: 96.6%

 epoch: 4339 | train_loss: 0.27, train_acc: 96.7% | test_loss: 0.22, test_acc: 97.1%

input:       and family may be limited forcing the individual to look elsewhere for these social interactions humour has been shown

target:      and family may be limited forcing the individual to look elsewhere for these social interactions humour has been shown to

prediction:  and family may be limited forcing the individual to look elsewhere for these social interactions humour has been shown the

 epoch: 4340 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%


 29%|██▉       | 4342/15000 [08:33<22:32,  7.88it/s]


 epoch: 4341 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4342 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4343 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.8%


 29%|██▉       | 4346/15000 [08:33<18:40,  9.51it/s]


 epoch: 4344 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4345 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4346 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 29%|██▉       | 4348/15000 [08:34<17:51,  9.95it/s]


 epoch: 4347 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4348 | train_loss: 0.27, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4349 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.22, test_acc: 97.2%


 29%|██▉       | 4350/15000 [08:34<18:00,  9.86it/s]


input:       will of their province this was mostly caused by the placement of native auxiliary units in the areas they

target:      will of their province this was mostly caused by the placement of native auxiliary units in the areas they were

prediction:  will of their province this was mostly caused by the placement of native auxiliary units in the areas they the

 epoch: 4350 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4351 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 29%|██▉       | 4354/15000 [08:35<25:00,  7.10it/s]


 epoch: 4352 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4353 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4354 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 29%|██▉       | 4356/15000 [08:35<21:57,  8.08it/s]


 epoch: 4355 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4356 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4357 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 29%|██▉       | 4360/15000 [08:35<19:19,  9.17it/s]


 epoch: 4358 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4359 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

input:       quran attending qur anic school is how children become recognized members of the islamic faith children often attend state

target:      quran attending qur anic school is how children become recognized members of the islamic faith children often attend state schools

prediction:  quran attending qur anic school is how children become recognized members of the islamic faith children often attend state the

 epoch: 4360 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 29%|██▉       | 4362/15000 [08:35<17:49,  9.95it/s]


 epoch: 4361 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4362 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4363 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 29%|██▉       | 4364/15000 [08:36<17:14, 10.28it/s]


 epoch: 4364 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4365 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 29%|██▉       | 4368/15000 [08:36<23:48,  7.44it/s]


 epoch: 4366 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4367 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4368 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%


 29%|██▉       | 4370/15000 [08:37<21:49,  8.12it/s]


 epoch: 4369 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       having increased by factor of at least reheating occurred until the universe obtained the temperatures required for the production

target:      having increased by factor of at least reheating occurred until the universe obtained the temperatures required for the production of

prediction:  having increased by factor of at least reheating occurred until the universe obtained the temperatures required for the production the

 epoch: 4370 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4371 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%


 29%|██▉       | 4374/15000 [08:37<18:06,  9.78it/s]


 epoch: 4372 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4373 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4374 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 29%|██▉       | 4376/15000 [08:37<17:13, 10.28it/s]


 epoch: 4375 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4376 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4377 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 29%|██▉       | 4378/15000 [08:37<16:46, 10.55it/s]


 epoch: 4378 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4379 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

input:       to imagined or hypothetical events as well as events that took place in the past or may happen in

target:      to imagined or hypothetical events as well as events that took place in the past or may happen in the

prediction:  to imagined or hypothetical events as well as events that took place in the past or may happen in the


 29%|██▉       | 4380/15000 [08:37<17:16, 10.24it/s]


 epoch: 4380 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.3%


 29%|██▉       | 4382/15000 [08:38<27:13,  6.50it/s]


 epoch: 4381 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4382 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4383 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 29%|██▉       | 4386/15000 [08:38<20:52,  8.48it/s]


 epoch: 4384 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4385 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.7%

 epoch: 4386 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 29%|██▉       | 4388/15000 [08:38<18:54,  9.35it/s]


 epoch: 4387 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4388 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4389 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 29%|██▉       | 4390/15000 [08:39<18:53,  9.36it/s]


input:       of geological time previously geologists could only use fossils and stratigraphic correlation to date sections of rock relative to

target:      of geological time previously geologists could only use fossils and stratigraphic correlation to date sections of rock relative to one

prediction:  of geological time previously geologists could only use fossils and stratigraphic correlation to date sections of rock relative to the

 epoch: 4390 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4391 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.2%


 29%|██▉       | 4394/15000 [08:39<17:32, 10.08it/s]


 epoch: 4392 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 4393 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

 epoch: 4394 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 29%|██▉       | 4396/15000 [08:40<28:45,  6.15it/s]


 epoch: 4395 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

 epoch: 4396 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4397 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 29%|██▉       | 4400/15000 [08:40<22:47,  7.75it/s]


 epoch: 4398 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4399 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

input:       scholastic aptitude test to standardize college admissions the results of intelligence tests were used to argue for segregated schools

target:      scholastic aptitude test to standardize college admissions the results of intelligence tests were used to argue for segregated schools and

prediction:  scholastic aptitude test to standardize college admissions the results of intelligence tests were used to argue for segregated schools the

 epoch: 4400 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 29%|██▉       | 4402/15000 [08:40<20:32,  8.60it/s]


 epoch: 4401 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4402 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4403 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 29%|██▉       | 4406/15000 [08:41<18:08,  9.73it/s]


 epoch: 4404 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4405 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4406 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 29%|██▉       | 4408/15000 [08:41<17:41,  9.98it/s]


 epoch: 4407 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4408 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 29%|██▉       | 4410/15000 [08:42<31:46,  5.55it/s]


 epoch: 4409 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

input:       in meaning but which were supplemented by gesture for greater precision the single word for fish was

target:      in meaning but which were supplemented by gesture for greater precision the single word for fish was accompanied

prediction:  in meaning but which were supplemented by gesture for greater precision the single word for fish was the

 epoch: 4410 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 29%|██▉       | 4412/15000 [08:42<26:58,  6.54it/s]


 epoch: 4411 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4412 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 29%|██▉       | 4414/15000 [08:42<23:51,  7.39it/s]


 epoch: 4413 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4414 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 29%|██▉       | 4416/15000 [08:42<22:02,  8.01it/s]


 epoch: 4415 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4416 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 29%|██▉       | 4418/15000 [08:42<21:12,  8.31it/s]


 epoch: 4417 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 4418 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 29%|██▉       | 4420/15000 [08:43<24:28,  7.20it/s]


 epoch: 4419 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

input:       home by one million people or more include chinese million tagalog million vietnamese million french million korean million and

target:      home by one million people or more include chinese million tagalog million vietnamese million french million korean million and german

prediction:  home by one million people or more include chinese million tagalog million vietnamese million french million korean million and the

 epoch: 4420 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 29%|██▉       | 4422/15000 [08:43<22:57,  7.68it/s]


 epoch: 4421 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4422 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 29%|██▉       | 4423/15000 [08:43<22:28,  7.84it/s]


 epoch: 4423 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%


 30%|██▉       | 4425/15000 [08:44<35:26,  4.97it/s]


 epoch: 4424 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4425 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 30%|██▉       | 4427/15000 [08:44<26:39,  6.61it/s]


 epoch: 4426 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4427 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4428 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%


 30%|██▉       | 4430/15000 [08:44<22:31,  7.82it/s]


 epoch: 4429 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       arabs who arrived in the th century ce introduced the arabic language and islam to north africa the semitic

target:      arabs who arrived in the th century ce introduced the arabic language and islam to north africa the semitic phoenicians

prediction:  arabs who arrived in the th century ce introduced the arabic language and islam to north africa the semitic the

 epoch: 4430 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 30%|██▉       | 4432/15000 [08:44<20:01,  8.80it/s]


 epoch: 4431 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 4432 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 30%|██▉       | 4434/15000 [08:45<20:22,  8.64it/s]


 epoch: 4433 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 4434 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 30%|██▉       | 4436/15000 [08:45<20:41,  8.51it/s]


 epoch: 4435 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 4436 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 30%|██▉       | 4438/15000 [08:45<21:08,  8.33it/s]


 epoch: 4437 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 4438 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 30%|██▉       | 4440/15000 [08:45<19:58,  8.81it/s]


 epoch: 4439 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

input:       to that era terman concluded that intellectual disability represents the level of intelligence which is very very common among

target:      to that era terman concluded that intellectual disability represents the level of intelligence which is very very common among spanish

prediction:  to that era terman concluded that intellectual disability represents the level of intelligence which is very very common among the

 epoch: 4440 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4441 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 30%|██▉       | 4444/15000 [08:46<17:18, 10.16it/s]


 epoch: 4442 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4443 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%

 epoch: 4444 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.4%


 30%|██▉       | 4446/15000 [08:46<16:51, 10.44it/s]


 epoch: 4445 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4446 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4447 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%


 30%|██▉       | 4450/15000 [08:46<16:39, 10.55it/s]


 epoch: 4448 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4449 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       george iii acknowledged britain defeat in the war leading to the signing of the treaty of paris on september

target:      george iii acknowledged britain defeat in the war leading to the signing of the treaty of paris on september which

prediction:  george iii acknowledged britain defeat in the war leading to the signing of the treaty of paris on september the

 epoch: 4450 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4451 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 30%|██▉       | 4454/15000 [08:47<21:19,  8.24it/s]


 epoch: 4452 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 4453 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4454 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.1%


 30%|██▉       | 4456/15000 [08:47<19:30,  9.01it/s]


 epoch: 4455 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4456 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4457 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 30%|██▉       | 4458/15000 [08:47<18:17,  9.60it/s]


 epoch: 4458 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%

 epoch: 4459 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

input:       times the united states has operated under an uncodified informal two party system for most of its history although

target:      times the united states has operated under an uncodified informal two party system for most of its history although other

prediction:  times the united states has operated under an uncodified informal two party system for most of its history although the


 30%|██▉       | 4462/15000 [08:48<17:34, 10.00it/s]


 epoch: 4460 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4461 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4462 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4463 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%


 30%|██▉       | 4464/15000 [08:48<17:12, 10.20it/s]


 epoch: 4464 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4465 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 30%|██▉       | 4468/15000 [08:49<24:53,  7.05it/s]


 epoch: 4466 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4467 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4468 | train_loss: 0.25, train_acc: 96.0% | test_loss: 0.25, test_acc: 96.9%


 30%|██▉       | 4470/15000 [08:49<22:44,  7.72it/s]


 epoch: 4469 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       meaning and context and thus tend to appeal to more mature audience many theories exist about what humour is

target:      meaning and context and thus tend to appeal to more mature audience many theories exist about what humour is and

prediction:  meaning and context and thus tend to appeal to more mature audience many theories exist about what humour is the

 epoch: 4470 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 96.9%

 epoch: 4471 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%


 30%|██▉       | 4474/15000 [08:49<18:40,  9.40it/s]


 epoch: 4472 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4473 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 4474 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 30%|██▉       | 4476/15000 [08:49<17:45,  9.87it/s]


 epoch: 4475 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4476 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4477 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.0%


 30%|██▉       | 4478/15000 [08:49<17:25, 10.07it/s]


 epoch: 4478 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.7%

 epoch: 4479 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       hull suggested that while memes might exist as dawkins conceives of them he finds it important to suggest that

target:      hull suggested that while memes might exist as dawkins conceives of them he finds it important to suggest that instead

prediction:  hull suggested that while memes might exist as dawkins conceives of them he finds it important to suggest that of


 30%|██▉       | 4480/15000 [08:50<17:58,  9.75it/s]


 epoch: 4480 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 30%|██▉       | 4482/15000 [08:50<24:21,  7.20it/s]


 epoch: 4481 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%

 epoch: 4482 | train_loss: 0.27, train_acc: 96.6% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4483 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%


 30%|██▉       | 4486/15000 [08:50<19:33,  8.96it/s]


 epoch: 4484 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4485 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.4%

 epoch: 4486 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%


 30%|██▉       | 4488/15000 [08:51<18:23,  9.53it/s]


 epoch: 4487 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4488 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 4489 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%


 30%|██▉       | 4490/15000 [08:51<18:19,  9.56it/s]


input:       nile river reaching its greatest extent during the nd millennium bc which is referred to as the new kingdom

target:      nile river reaching its greatest extent during the nd millennium bc which is referred to as the new kingdom period

prediction:  nile river reaching its greatest extent during the nd millennium bc which is referred to as the new kingdom the

 epoch: 4490 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4491 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 30%|██▉       | 4494/15000 [08:51<17:15, 10.14it/s]


 epoch: 4492 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4493 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 4494 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%


 30%|██▉       | 4496/15000 [08:52<28:18,  6.18it/s]


 epoch: 4495 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4496 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4497 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 30%|███       | 4500/15000 [08:52<22:08,  7.90it/s]


 epoch: 4498 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4499 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

input:       cirrhosis he created models of the cerebral ventricles with the use of melted wax and constructed glass aorta to

target:      cirrhosis he created models of the cerebral ventricles with the use of melted wax and constructed glass aorta to observe

prediction:  cirrhosis he created models of the cerebral ventricles with the use of melted wax and constructed glass aorta to the

 epoch: 4500 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 30%|███       | 4502/15000 [08:52<20:04,  8.71it/s]


 epoch: 4501 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4502 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4503 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 30%|███       | 4506/15000 [08:53<17:41,  9.89it/s]


 epoch: 4504 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4505 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4506 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 30%|███       | 4508/15000 [08:53<17:18, 10.10it/s]


 epoch: 4507 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4508 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 30%|███       | 4510/15000 [08:54<30:28,  5.74it/s]


 epoch: 4509 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

input:       been described as pacific rim incorporating the native ori cuisine and diverse culinary traditions introduced by settlers and immigrants

target:      been described as pacific rim incorporating the native ori cuisine and diverse culinary traditions introduced by settlers and immigrants from

prediction:  been described as pacific rim incorporating the native ori cuisine and diverse culinary traditions introduced by settlers and immigrants of

 epoch: 4510 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 30%|███       | 4512/15000 [08:54<26:54,  6.50it/s]


 epoch: 4511 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4512 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 30%|███       | 4514/15000 [08:54<24:08,  7.24it/s]


 epoch: 4513 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%

 epoch: 4514 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.3%


 30%|███       | 4516/15000 [08:54<22:27,  7.78it/s]


 epoch: 4515 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4516 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 30%|███       | 4518/15000 [08:55<22:29,  7.77it/s]


 epoch: 4517 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4518 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 30%|███       | 4520/15000 [08:55<23:18,  7.49it/s]


 epoch: 4519 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       itself on psychology with translations from american authors like william james the establishment of university psychology departments and

target:      itself on psychology with translations from american authors like william james the establishment of university psychology departments and journals

prediction:  itself on psychology with translations from american authors like william james the establishment of university psychology departments and the

 epoch: 4520 | train_loss: 0.26, train_acc: 96.6% | test_loss: 0.24, test_acc: 97.0%


 30%|███       | 4522/15000 [08:55<21:15,  8.21it/s]


 epoch: 4521 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4522 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 30%|███       | 4524/15000 [08:55<22:52,  7.63it/s]


 epoch: 4523 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4524 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 30%|███       | 4526/15000 [08:56<21:06,  8.27it/s]


 epoch: 4525 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4526 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 30%|███       | 4528/15000 [08:56<20:12,  8.63it/s]


 epoch: 4527 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4528 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%


 30%|███       | 4530/15000 [08:56<22:13,  7.85it/s]


 epoch: 4529 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       repeated activities of organisms in their environment this generates legacy of effects that modify and feed back into the

target:      repeated activities of organisms in their environment this generates legacy of effects that modify and feed back into the selection

prediction:  repeated activities of organisms in their environment this generates legacy of effects that modify and feed back into the the

 epoch: 4530 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 30%|███       | 4532/15000 [08:56<21:35,  8.08it/s]


 epoch: 4531 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%

 epoch: 4532 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 30%|███       | 4534/15000 [08:57<21:23,  8.15it/s]


 epoch: 4533 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4534 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 30%|███       | 4536/15000 [08:57<20:56,  8.33it/s]


 epoch: 4535 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4536 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 30%|███       | 4537/15000 [08:57<20:54,  8.34it/s]


 epoch: 4537 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 30%|███       | 4538/15000 [08:57<44:45,  3.90it/s]


 epoch: 4538 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4539 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%

input:       in american culture says that it has been used by western cartographers since the mid th century to give

target:      in american culture says that it has been used by western cartographers since the mid th century to give order

prediction:  in american culture says that it has been used by western cartographers since the mid th century to give the


 30%|███       | 4542/15000 [08:58<25:43,  6.77it/s]


 epoch: 4540 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4541 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4542 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 30%|███       | 4544/15000 [08:58<21:49,  7.98it/s]


 epoch: 4543 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4544 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4545 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 30%|███       | 4548/15000 [08:58<18:26,  9.44it/s]


 epoch: 4546 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4547 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4548 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.1%


 30%|███       | 4550/15000 [08:59<18:23,  9.47it/s]


 epoch: 4549 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

input:       of natural philosophy but during the scientific revolution in the th century these natural sciences emerged as unique research

target:      of natural philosophy but during the scientific revolution in the th century these natural sciences emerged as unique research endeavors

prediction:  of natural philosophy but during the scientific revolution in the th century these natural sciences emerged as unique research the

 epoch: 4550 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4551 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.4%


 30%|███       | 4554/15000 [08:59<25:43,  6.77it/s]


 epoch: 4552 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4553 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 95.9%

 epoch: 4554 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 30%|███       | 4556/15000 [09:00<22:48,  7.63it/s]


 epoch: 4555 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4556 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 4557 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 30%|███       | 4560/15000 [09:00<19:31,  8.91it/s]


 epoch: 4558 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4559 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       supposedly worked after number of times and cleomenes led spartan force to overthrow hippias which succeeded and instated an

target:      supposedly worked after number of times and cleomenes led spartan force to overthrow hippias which succeeded and instated an oligarchy

prediction:  supposedly worked after number of times and cleomenes led spartan force to overthrow hippias which succeeded and instated an the

 epoch: 4560 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 30%|███       | 4562/15000 [09:00<18:08,  9.59it/s]


 epoch: 4561 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4562 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4563 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 30%|███       | 4566/15000 [09:01<16:29, 10.54it/s]


 epoch: 4564 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4565 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4566 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 30%|███       | 4568/15000 [09:01<22:57,  7.57it/s]


 epoch: 4567 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4568 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4569 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 30%|███       | 4570/15000 [09:01<21:35,  8.05it/s]


input:       rocks seen through the brown mountain stream and much of jesus figure indicating leonardo hand additionally leonardo may have

target:      rocks seen through the brown mountain stream and much of jesus figure indicating leonardo hand additionally leonardo may have been

prediction:  rocks seen through the brown mountain stream and much of jesus figure indicating leonardo hand additionally leonardo may have the

 epoch: 4570 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4571 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 30%|███       | 4574/15000 [09:02<18:10,  9.56it/s]


 epoch: 4572 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4573 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4574 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%


 31%|███       | 4576/15000 [09:02<17:11, 10.11it/s]


 epoch: 4575 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.2%

 epoch: 4576 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4577 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 31%|███       | 4578/15000 [09:02<16:51, 10.30it/s]


 epoch: 4578 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 4579 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

input:       quarter division of the empire after the empire had been plagued by bloody disputes about the supreme authority this

target:      quarter division of the empire after the empire had been plagued by bloody disputes about the supreme authority this finally

prediction:  quarter division of the empire after the empire had been plagued by bloody disputes about the supreme authority this the


 31%|███       | 4580/15000 [09:02<17:12, 10.09it/s]


 epoch: 4580 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 31%|███       | 4582/15000 [09:03<28:27,  6.10it/s]


 epoch: 4581 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4582 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4583 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 31%|███       | 4586/15000 [09:03<21:24,  8.11it/s]


 epoch: 4584 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4585 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4586 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 31%|███       | 4588/15000 [09:03<19:31,  8.89it/s]


 epoch: 4587 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4588 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 4589 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 31%|███       | 4590/15000 [09:03<18:56,  9.16it/s]


input:       of the term continent the current division of eurasia into two continents now reflects east west cultural linguistic and

target:      of the term continent the current division of eurasia into two continents now reflects east west cultural linguistic and ethnic

prediction:  of the term continent the current division of eurasia into two continents now reflects east west cultural linguistic and the

 epoch: 4590 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4591 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 31%|███       | 4594/15000 [09:04<17:33,  9.88it/s]


 epoch: 4592 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4593 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4594 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 31%|███       | 4596/15000 [09:04<28:30,  6.08it/s]


 epoch: 4595 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4596 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4597 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 31%|███       | 4598/15000 [09:05<24:43,  7.01it/s]


 epoch: 4598 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.3%

 epoch: 4599 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       arcade next the game differs from the home computer versions in number of ways including newly designed crafting system

target:      arcade next the game differs from the home computer versions in number of ways including newly designed crafting system the

prediction:  arcade next the game differs from the home computer versions in number of ways including newly designed crafting system the


 31%|███       | 4602/15000 [09:05<20:12,  8.58it/s]


 epoch: 4600 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4601 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4602 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.1%


 31%|███       | 4604/15000 [09:05<18:43,  9.25it/s]


 epoch: 4603 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4604 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4605 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%


 31%|███       | 4608/15000 [09:06<16:47, 10.31it/s]


 epoch: 4606 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4607 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

 epoch: 4608 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 31%|███       | 4610/15000 [09:06<26:17,  6.59it/s]


 epoch: 4609 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       by priests the instruction of amenemope is considered masterpiece of near eastern literature towards the end of the new

target:      by priests the instruction of amenemope is considered masterpiece of near eastern literature towards the end of the new kingdom

prediction:  by priests the instruction of amenemope is considered masterpiece of near eastern literature towards the end of the new the

 epoch: 4610 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4611 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 31%|███       | 4612/15000 [09:06<23:06,  7.49it/s]



 epoch: 4612 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4613 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%


 31%|███       | 4616/15000 [09:07<18:48,  9.20it/s]


 epoch: 4614 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4615 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4616 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 31%|███       | 4618/15000 [09:07<17:37,  9.81it/s]


 epoch: 4617 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4618 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4619 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%


 31%|███       | 4620/15000 [09:07<18:03,  9.58it/s]


input:       horvath alleged that founder and ceo tom preston werner and his wife theresa engaged in pattern of harassment against

target:      horvath alleged that founder and ceo tom preston werner and his wife theresa engaged in pattern of harassment against her

prediction:  horvath alleged that founder and ceo tom preston werner and his wife theresa engaged in pattern of harassment against the

 epoch: 4620 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4621 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%


 31%|███       | 4622/15000 [09:07<17:22,  9.96it/s]


 epoch: 4622 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4623 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 31%|███       | 4625/15000 [09:08<27:43,  6.24it/s]


 epoch: 4624 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4625 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 31%|███       | 4627/15000 [09:08<25:14,  6.85it/s]


 epoch: 4626 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4627 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 31%|███       | 4629/15000 [09:08<23:40,  7.30it/s]


 epoch: 4628 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4629 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 31%|███       | 4631/15000 [09:09<23:02,  7.50it/s]


input:       these areas are descendants of the ancient pangaea supercontinent along with landmasses such as the americas and afro eurasia

target:      these areas are descendants of the ancient pangaea supercontinent along with landmasses such as the americas and afro eurasia volcanic

prediction:  these areas are descendants of the ancient pangaea supercontinent along with landmasses such as the americas and afro eurasia the

 epoch: 4630 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.7%

 epoch: 4631 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 31%|███       | 4634/15000 [09:09<19:52,  8.69it/s]


 epoch: 4632 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4633 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4634 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 31%|███       | 4636/15000 [09:09<20:15,  8.53it/s]


 epoch: 4635 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4636 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 31%|███       | 4637/15000 [09:09<20:34,  8.39it/s]


 epoch: 4637 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 31%|███       | 4640/15000 [09:10<30:18,  5.70it/s]


 epoch: 4638 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.3%

 epoch: 4639 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

input:       to the office no more than twice the president is not elected by direct vote but by an indirect

target:      to the office no more than twice the president is not elected by direct vote but by an indirect electoral

prediction:  to the office no more than twice the president is not elected by direct vote but by an indirect the

 epoch: 4640 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 31%|███       | 4642/15000 [09:10<24:31,  7.04it/s]


 epoch: 4641 | train_loss: 0.22, train_acc: 97.5% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4642 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4643 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 31%|███       | 4646/15000 [09:11<18:56,  9.11it/s]


 epoch: 4644 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4645 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4646 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 31%|███       | 4648/15000 [09:11<17:51,  9.66it/s]


 epoch: 4647 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4648 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4649 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 31%|███       | 4650/15000 [09:11<18:17,  9.43it/s]


input:       considered the pallium an appropriate garment both for christians in contrast to the toga and for educated people roman

target:      considered the pallium an appropriate garment both for christians in contrast to the toga and for educated people roman clothing

prediction:  considered the pallium an appropriate garment both for christians in contrast to the toga and for educated people roman the

 epoch: 4650 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 4651 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 31%|███       | 4654/15000 [09:12<25:46,  6.69it/s]


 epoch: 4652 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%

 epoch: 4653 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4654 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 31%|███       | 4656/15000 [09:12<22:14,  7.75it/s]


 epoch: 4655 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4656 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4657 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 31%|███       | 4660/15000 [09:12<18:53,  9.12it/s]


 epoch: 4658 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4659 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

input:       or nearly percent of the total area of the united states was given away free to million homesteaders most

target:      or nearly percent of the total area of the united states was given away free to million homesteaders most of

prediction:  or nearly percent of the total area of the united states was given away free to million homesteaders most the

 epoch: 4660 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 31%|███       | 4662/15000 [09:13<17:35,  9.80it/s]


 epoch: 4661 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4662 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4663 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 31%|███       | 4666/15000 [09:13<16:31, 10.42it/s]


 epoch: 4664 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4665 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4666 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 31%|███       | 4668/15000 [09:14<26:12,  6.57it/s]


 epoch: 4667 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4668 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4669 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 31%|███       | 4670/15000 [09:14<23:48,  7.23it/s]


input:       complex and barents sea the northern plain contains the old geological continent of baltica and so may be regarded

target:      complex and barents sea the northern plain contains the old geological continent of baltica and so may be regarded geologically

prediction:  complex and barents sea the northern plain contains the old geological continent of baltica and so may be regarded the

 epoch: 4670 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.8%

 epoch: 4671 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 31%|███       | 4674/15000 [09:14<19:22,  8.89it/s]


 epoch: 4672 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4673 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4674 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 31%|███       | 4676/15000 [09:14<17:55,  9.60it/s]


 epoch: 4675 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4676 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4677 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 31%|███       | 4680/15000 [09:15<17:18,  9.94it/s]


 epoch: 4678 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4679 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

input:       also introduced in the proceeding centuries via the slave trade european colonists were heavily dependent on indigenous labor during

target:      also introduced in the proceeding centuries via the slave trade european colonists were heavily dependent on indigenous labor during the

prediction:  also introduced in the proceeding centuries via the slave trade european colonists were heavily dependent on indigenous labor during the

 epoch: 4680 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 31%|███       | 4682/15000 [09:15<25:44,  6.68it/s]


 epoch: 4681 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4682 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4683 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.4%


 31%|███       | 4686/15000 [09:16<20:17,  8.47it/s]


 epoch: 4684 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4685 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4686 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 31%|███▏      | 4689/15000 [09:16<17:56,  9.58it/s]


 epoch: 4687 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4688 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4689 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 31%|███▏      | 4691/15000 [09:16<17:53,  9.61it/s]


input:       in typical fashion continued to fight macedon until it was completely absorbed into the roman republic by bc in

target:      in typical fashion continued to fight macedon until it was completely absorbed into the roman republic by bc in the

prediction:  in typical fashion continued to fight macedon until it was completely absorbed into the roman republic by bc in the

 epoch: 4690 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4691 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 31%|███▏      | 4693/15000 [09:16<17:40,  9.72it/s]


 epoch: 4692 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.3%

 epoch: 4693 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4694 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 31%|███▏      | 4697/15000 [09:17<21:47,  7.88it/s]


 epoch: 4695 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4696 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4697 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 31%|███▏      | 4699/15000 [09:17<19:47,  8.68it/s]


 epoch: 4698 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 4699 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

input:       the mouth such as the sounds called laterals because the air flows along both sides of the tongue and

target:      the mouth such as the sounds called laterals because the air flows along both sides of the tongue and the

prediction:  the mouth such as the sounds called laterals because the air flows along both sides of the tongue and the


 31%|███▏      | 4702/15000 [09:17<18:20,  9.36it/s]


 epoch: 4700 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.9%

 epoch: 4701 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4702 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 31%|███▏      | 4704/15000 [09:17<17:10, 10.00it/s]


 epoch: 4703 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4704 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4705 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 31%|███▏      | 4708/15000 [09:18<16:40, 10.29it/s]


 epoch: 4706 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4707 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4708 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 31%|███▏      | 4710/15000 [09:19<28:48,  5.95it/s]


 epoch: 4709 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

input:       the areas of sculpture and architecture in the west the art of the roman empire was largely derived from

target:      the areas of sculpture and architecture in the west the art of the roman empire was largely derived from greek

prediction:  the areas of sculpture and architecture in the west the art of the roman empire was largely derived from the

 epoch: 4710 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4711 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 31%|███▏      | 4714/15000 [09:19<21:27,  7.99it/s]


 epoch: 4712 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 96.9%

 epoch: 4713 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4714 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%


 31%|███▏      | 4716/15000 [09:19<19:24,  8.83it/s]


 epoch: 4715 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 4716 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4717 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 31%|███▏      | 4718/15000 [09:19<18:14,  9.39it/s]


 epoch: 4718 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4719 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%

input:       to south america arguably occurred approximately to mya and the great lakes as well as many other northern freshwater

target:      to south america arguably occurred approximately to mya and the great lakes as well as many other northern freshwater lakes

prediction:  to south america arguably occurred approximately to mya and the great lakes as well as many other northern freshwater the


 31%|███▏      | 4722/15000 [09:20<17:39,  9.70it/s]


 epoch: 4720 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4721 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4722 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.1%

 epoch: 4723 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


 32%|███▏      | 4725/15000 [09:20<24:08,  7.09it/s]


 epoch: 4724 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.4%

 epoch: 4725 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 32%|███▏      | 4727/15000 [09:20<21:21,  8.02it/s]


 epoch: 4726 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4727 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 32%|███▏      | 4729/15000 [09:21<20:53,  8.19it/s]


 epoch: 4728 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4729 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 32%|███▏      | 4730/15000 [09:21<23:15,  7.36it/s]


input:       occupied by japan until the country liberation by the led forces in during the war the united states

target:      occupied by japan until the country liberation by the led forces in during the war the united states was

prediction:  occupied by japan until the country liberation by the led forces in during the war the united states the

 epoch: 4730 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 32%|███▏      | 4732/15000 [09:21<22:19,  7.67it/s]


 epoch: 4731 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4732 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 32%|███▏      | 4734/15000 [09:21<21:51,  7.83it/s]


 epoch: 4733 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4734 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 32%|███▏      | 4736/15000 [09:22<21:23,  7.99it/s]


 epoch: 4735 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4736 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 32%|███▏      | 4737/15000 [09:22<21:45,  7.86it/s]


 epoch: 4737 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 32%|███▏      | 4739/15000 [09:22<37:10,  4.60it/s]


 epoch: 4738 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4739 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       and the officers demanded valentinian choose co ruler on march valentinian chose his own younger brother valens and the


 32%|███▏      | 4741/15000 [09:23<29:41,  5.76it/s]


target:      and the officers demanded valentinian choose co ruler on march valentinian chose his own younger brother valens and the two

prediction:  and the officers demanded valentinian choose co ruler on march valentinian chose his own younger brother valens and the the

 epoch: 4740 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4741 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%


 32%|███▏      | 4743/15000 [09:23<24:14,  7.05it/s]


 epoch: 4742 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4743 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 32%|███▏      | 4745/15000 [09:23<20:17,  8.43it/s]


 epoch: 4744 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.6%

 epoch: 4745 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4746 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 32%|███▏      | 4748/15000 [09:23<18:01,  9.48it/s]


 epoch: 4747 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4748 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 4749 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 32%|███▏      | 4751/15000 [09:24<18:03,  9.46it/s]


input:       not nearly as complex as the painting ordered by the monks of san donato having only four figures rather

target:      not nearly as complex as the painting ordered by the monks of san donato having only four figures rather than

prediction:  not nearly as complex as the painting ordered by the monks of san donato having only four figures rather the

 epoch: 4750 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4751 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4752 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.19, test_acc: 97.4%


 32%|███▏      | 4755/15000 [09:24<25:43,  6.64it/s]


 epoch: 4753 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4754 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4755 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 32%|███▏      | 4757/15000 [09:25<22:05,  7.73it/s]


 epoch: 4756 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4757 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4758 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 32%|███▏      | 4761/15000 [09:25<18:45,  9.10it/s]


 epoch: 4759 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       with each other this seems not to have restricted trade which went on between the cities priestly elite kept

target:      with each other this seems not to have restricted trade which went on between the cities priestly elite kept astronomical

prediction:  with each other this seems not to have restricted trade which went on between the cities priestly elite kept the

 epoch: 4760 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4761 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 32%|███▏      | 4763/15000 [09:25<17:30,  9.74it/s]


 epoch: 4762 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4763 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4764 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 32%|███▏      | 4765/15000 [09:25<16:45, 10.18it/s]


 epoch: 4765 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4766 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.2%


 32%|███▏      | 4769/15000 [09:26<17:34,  9.70it/s]


 epoch: 4767 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4768 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4769 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 32%|███▏      | 4771/15000 [09:26<17:29,  9.75it/s]


input:       of terror napoleon bonaparte rose to power in the aftermath of the french revolution and established the first french

target:      of terror napoleon bonaparte rose to power in the aftermath of the french revolution and established the first french empire

prediction:  of terror napoleon bonaparte rose to power in the aftermath of the french revolution and established the first french the

 epoch: 4770 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4771 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 32%|███▏      | 4773/15000 [09:26<17:00, 10.02it/s]


 epoch: 4772 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4773 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4774 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 32%|███▏      | 4777/15000 [09:27<16:03, 10.61it/s]


 epoch: 4775 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4776 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4777 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 32%|███▏      | 4779/15000 [09:27<15:53, 10.72it/s]


 epoch: 4778 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4779 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

input:       is the great vowel shift in english which is the reason that the spelling of english vowels do not

target:      is the great vowel shift in english which is the reason that the spelling of english vowels do not correspond

prediction:  is the great vowel shift in english which is the reason that the spelling of english vowels do not the

 epoch: 4780 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%


 32%|███▏      | 4783/15000 [09:28<24:22,  6.98it/s]


 epoch: 4781 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4782 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4783 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 32%|███▏      | 4785/15000 [09:28<21:21,  7.97it/s]


 epoch: 4784 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4785 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4786 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 32%|███▏      | 4789/15000 [09:28<17:51,  9.53it/s]


 epoch: 4787 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.2%

 epoch: 4788 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%

 epoch: 4789 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.0%


 32%|███▏      | 4791/15000 [09:28<18:03,  9.42it/s]


input:       include wide variety of learning modalities the learner personality may also affect educational achievement for example the features of

target:      include wide variety of learning modalities the learner personality may also affect educational achievement for example the features of conscientiousness

prediction:  include wide variety of learning modalities the learner personality may also affect educational achievement for example the features of the

 epoch: 4790 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4791 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 32%|███▏      | 4793/15000 [09:29<17:30,  9.72it/s]


 epoch: 4792 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4793 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4794 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%


 32%|███▏      | 4797/15000 [09:29<23:53,  7.12it/s]


 epoch: 4795 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 4796 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4797 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 32%|███▏      | 4799/15000 [09:29<21:07,  8.05it/s]


 epoch: 4798 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4799 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

input:       worship of the traditional gods continued the art of mummy portraiture flourished and some roman emperors had themselves depicted

target:      worship of the traditional gods continued the art of mummy portraiture flourished and some roman emperors had themselves depicted as

prediction:  worship of the traditional gods continued the art of mummy portraiture flourished and some roman emperors had themselves depicted the

 epoch: 4800 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.0%


 32%|███▏      | 4802/15000 [09:30<19:43,  8.62it/s]


 epoch: 4801 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4802 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4803 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 32%|███▏      | 4806/15000 [09:30<17:04,  9.95it/s]


 epoch: 4804 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4805 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4806 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 32%|███▏      | 4808/15000 [09:30<16:24, 10.35it/s]


 epoch: 4807 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4808 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%

 epoch: 4809 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%


 32%|███▏      | 4810/15000 [09:31<19:50,  8.56it/s]


input:       of the julio claudian dynasty with vespasian one of the first emperors outside the dynasty caesar evolved from family

target:      of the julio claudian dynasty with vespasian one of the first emperors outside the dynasty caesar evolved from family name

prediction:  of the julio claudian dynasty with vespasian one of the first emperors outside the dynasty caesar evolved from family the

 epoch: 4810 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 32%|███▏      | 4812/15000 [09:31<18:07,  9.37it/s]


 epoch: 4811 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4812 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4813 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%


 32%|███▏      | 4816/15000 [09:31<16:20, 10.39it/s]


 epoch: 4814 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4815 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.7%

 epoch: 4816 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 32%|███▏      | 4818/15000 [09:31<15:45, 10.77it/s]


 epoch: 4817 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4818 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4819 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 32%|███▏      | 4820/15000 [09:32<16:53, 10.05it/s]


input:       figure of amboise wax model survives and if genuine is the only extant example of leonardo sculpture leonardo was

target:      figure of amboise wax model survives and if genuine is the only extant example of leonardo sculpture leonardo was otherwise

prediction:  figure of amboise wax model survives and if genuine is the only extant example of leonardo sculpture leonardo was the

 epoch: 4820 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4821 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%


 32%|███▏      | 4822/15000 [09:32<16:18, 10.40it/s]


 epoch: 4822 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4823 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 32%|███▏      | 4826/15000 [09:33<23:35,  7.19it/s]


 epoch: 4824 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4825 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4826 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%


 32%|███▏      | 4829/15000 [09:33<19:31,  8.68it/s]


 epoch: 4827 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4828 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4829 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 32%|███▏      | 4831/15000 [09:33<19:37,  8.63it/s]


input:       psychology is subfield of psychology personnel psychologists apply the methods and principles of psychology in selecting and evaluating

target:      psychology is subfield of psychology personnel psychologists apply the methods and principles of psychology in selecting and evaluating workers

prediction:  psychology is subfield of psychology personnel psychologists apply the methods and principles of psychology in selecting and evaluating the

 epoch: 4830 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4831 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 32%|███▏      | 4833/15000 [09:33<19:24,  8.73it/s]


 epoch: 4832 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4833 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%


 32%|███▏      | 4835/15000 [09:33<19:15,  8.80it/s]


 epoch: 4834 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4835 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%


 32%|███▏      | 4837/15000 [09:34<20:05,  8.43it/s]


 epoch: 4836 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4837 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 32%|███▏      | 4839/15000 [09:34<34:35,  4.89it/s]


 epoch: 4838 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 4839 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

input:       of southeast asian nations asean brunei cambodia indonesia laos malaysia myanmar the philippines singapore thailand and vietnam are among

target:      

 32%|███▏      | 4841/15000 [09:35<28:54,  5.86it/s]

of southeast asian nations asean brunei cambodia indonesia laos malaysia myanmar the philippines singapore thailand and vietnam are among the

prediction:  of southeast asian nations asean brunei cambodia indonesia laos malaysia myanmar the philippines singapore thailand and vietnam are among the

 epoch: 4840 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4841 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 32%|███▏      | 4843/15000 [09:35<25:12,  6.72it/s]


 epoch: 4842 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4843 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 32%|███▏      | 4845/15000 [09:35<22:54,  7.39it/s]


 epoch: 4844 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4845 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 32%|███▏      | 4848/15000 [09:36<19:35,  8.64it/s]


 epoch: 4846 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 4847 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4848 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 32%|███▏      | 4850/15000 [09:36<22:10,  7.63it/s]


 epoch: 4849 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       stalin himself were celebrated as heroes of soviet psychology soviet academics experienced degree of liberalization during the khrushchev thaw

target:      stalin himself were celebrated as heroes of soviet psychology soviet academics experienced degree of liberalization during the khrushchev thaw the

prediction:  stalin himself were celebrated as heroes of soviet psychology soviet academics experienced degree of liberalization during the khrushchev thaw the

 epoch: 4850 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 32%|███▏      | 4852/15000 [09:36<21:33,  7.85it/s]


 epoch: 4851 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4852 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 32%|███▏      | 4855/15000 [09:37<25:07,  6.73it/s]


 epoch: 4853 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 4854 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4855 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 32%|███▏      | 4857/15000 [09:37<21:03,  8.03it/s]


 epoch: 4856 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4857 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4858 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 32%|███▏      | 4860/15000 [09:37<18:48,  8.98it/s]


 epoch: 4859 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

input:       with foreign leaders while these functions were clearly defined during the principate the emperor powers over time became less

target:      with foreign leaders while these functions were clearly defined during the principate the emperor powers over time became less constitutional

prediction:  with foreign leaders while these functions were clearly defined during the principate the emperor powers over time became less the

 epoch: 4860 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4861 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 32%|███▏      | 4864/15000 [09:37<16:30, 10.23it/s]


 epoch: 4862 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4863 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4864 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.3%


 32%|███▏      | 4866/15000 [09:38<16:09, 10.46it/s]


 epoch: 4865 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4866 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%


 32%|███▏      | 4868/15000 [09:38<27:58,  6.04it/s]


 epoch: 4867 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4868 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4869 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 32%|███▏      | 4870/15000 [09:38<24:29,  6.89it/s]


input:       economies the british empire became dominant in south asia with large parts of the region first being conquered by

target:      economies the british empire became dominant in south asia with large parts of the region first being conquered by british

prediction:  economies the british empire became dominant in south asia with large parts of the region first being conquered by the

 epoch: 4870 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 4871 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 32%|███▏      | 4874/15000 [09:39<18:56,  8.91it/s]


 epoch: 4872 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4873 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4874 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 33%|███▎      | 4876/15000 [09:39<17:48,  9.47it/s]


 epoch: 4875 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.9%

 epoch: 4876 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4877 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 33%|███▎      | 4878/15000 [09:39<16:57,  9.95it/s]


 epoch: 4878 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

 epoch: 4879 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

input:       the midwest and south overall the united states receives more high impact extreme weather incidents than any other country

target:      the midwest and south overall the united states receives more high impact extreme weather incidents than any other country in

prediction:  the midwest and south overall the united states receives more high impact extreme weather incidents than any other country the


 33%|███▎      | 4880/15000 [09:39<17:10,  9.82it/s]


 epoch: 4880 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 33%|███▎      | 4882/15000 [09:40<28:15,  5.97it/s]


 epoch: 4881 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4882 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4883 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%


 33%|███▎      | 4886/15000 [09:40<21:18,  7.91it/s]


 epoch: 4884 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4885 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4886 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 33%|███▎      | 4888/15000 [09:40<19:08,  8.80it/s]


 epoch: 4887 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 4888 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4889 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 33%|███▎      | 4890/15000 [09:41<18:52,  8.93it/s]


input:       has been termed the snowball earth and it is of particular interest as it precedes the cambrian explosion in

target:      has been termed the snowball earth and it is of particular interest as it precedes the cambrian explosion in which

prediction:  has been termed the snowball earth and it is of particular interest as it precedes the cambrian explosion in the

 epoch: 4890 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 4891 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 33%|███▎      | 4894/15000 [09:41<17:00,  9.91it/s]


 epoch: 4892 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4893 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 4894 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.2%

 epoch: 4895 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 33%|███▎      | 4898/15000 [09:42<23:49,  7.07it/s]


 epoch: 4896 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4897 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4898 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%


 33%|███▎      | 4900/15000 [09:42<21:46,  7.73it/s]


 epoch: 4899 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

input:       believe in higher power and consider themselves spiritual most americans pray daily attend religious services and consider religion important

target:      believe in higher power and consider themselves spiritual most americans pray daily attend religious services and consider religion important in

prediction:  believe in higher power and consider themselves spiritual most americans pray daily attend religious services and consider religion important the

 epoch: 4900 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4901 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 33%|███▎      | 4902/15000 [09:42<19:50,  8.48it/s]



 epoch: 4902 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4903 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%


 33%|███▎      | 4906/15000 [09:43<17:06,  9.83it/s]


 epoch: 4904 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4905 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 4906 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 33%|███▎      | 4908/15000 [09:43<16:33, 10.16it/s]


 epoch: 4907 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4908 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4909 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

input:       extending as far as najran in present day saudi arabia rejection of the state religion became tantamount to treason

target:      extending as far as najran in present day saudi arabia rejection of the state religion became tantamount to treason this

prediction:  extending as far as najran in present day saudi arabia rejection of the state religion became tantamount to treason the


 33%|███▎      | 4912/15000 [09:44<23:28,  7.16it/s]


 epoch: 4910 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4911 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 4912 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 33%|███▎      | 4914/15000 [09:44<20:44,  8.11it/s]


 epoch: 4913 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4914 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4915 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 33%|███▎      | 4918/15000 [09:44<17:34,  9.56it/s]


 epoch: 4916 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4917 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4918 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 33%|███▎      | 4920/15000 [09:44<17:55,  9.37it/s]


 epoch: 4919 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

input:       explorer rge ousland became the first person to cross antarctica alone from coast to coast helped by kite on

target:      explorer rge ousland became the first person to cross antarctica alone from coast to coast helped by kite on parts

prediction:  explorer rge ousland became the first person to cross antarctica alone from coast to coast helped by kite on the

 epoch: 4920 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%


 33%|███▎      | 4922/15000 [09:45<17:33,  9.57it/s]


 epoch: 4921 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 4922 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4923 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.4%


 33%|███▎      | 4926/15000 [09:45<23:46,  7.06it/s]


 epoch: 4924 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4925 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.4%

 epoch: 4926 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%


 33%|███▎      | 4928/15000 [09:45<20:50,  8.05it/s]


 epoch: 4927 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 4928 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4929 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%


 33%|███▎      | 4930/15000 [09:46<19:57,  8.41it/s]


input:       the debugging process normally begins with identifying the steps to reproduce the problem this can be non trivial task

target:      the debugging process normally begins with identifying the steps to reproduce the problem this can be non trivial task particularly

prediction:  the debugging process normally begins with identifying the steps to reproduce the problem this can be non trivial task the

 epoch: 4930 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4931 | train_loss: 0.26, train_acc: 96.2% | test_loss: 0.21, test_acc: 97.2%


 33%|███▎      | 4934/15000 [09:46<16:58,  9.88it/s]


 epoch: 4932 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4933 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 4934 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 33%|███▎      | 4936/15000 [09:46<16:29, 10.17it/s]


 epoch: 4935 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4936 | train_loss: 0.23, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.0%


 33%|███▎      | 4938/15000 [09:46<17:04,  9.82it/s]


 epoch: 4937 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4938 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%


 33%|███▎      | 4940/15000 [09:47<24:17,  6.90it/s]


 epoch: 4939 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

input:       any roman perceived to have some official capacity roman courts held original jurisdiction over cases involving roman citizens throughout

target:      any roman perceived to have some official capacity roman courts held original jurisdiction over cases involving roman citizens throughout the

prediction:  any roman perceived to have some official capacity roman courts held original jurisdiction over cases involving roman citizens throughout the

 epoch: 4940 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 33%|███▎      | 4942/15000 [09:47<22:24,  7.48it/s]


 epoch: 4941 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 4942 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 33%|███▎      | 4944/15000 [09:47<21:20,  7.85it/s]


 epoch: 4943 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4944 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%


 33%|███▎      | 4946/15000 [09:48<20:42,  8.09it/s]


 epoch: 4945 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4946 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 33%|███▎      | 4948/15000 [09:48<19:46,  8.47it/s]


 epoch: 4947 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4948 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 33%|███▎      | 4950/15000 [09:48<22:16,  7.52it/s]


 epoch: 4949 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       no longer be commented or voted on the most popular posts from the site numerous subreddits are visible on

target:      no longer be commented or voted on the most popular posts from the site numerous subreddits are visible on the

prediction:  no longer be commented or voted on the most popular posts from the site numerous subreddits are visible on the

 epoch: 4950 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 33%|███▎      | 4952/15000 [09:48<21:04,  7.95it/s]


 epoch: 4951 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4952 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 33%|███▎      | 4955/15000 [09:49<29:14,  5.73it/s]


 epoch: 4953 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

 epoch: 4954 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

 epoch: 4955 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 33%|███▎      | 4958/15000 [09:49<21:21,  7.84it/s]


 epoch: 4956 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4957 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4958 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 33%|███▎      | 4960/15000 [09:50<19:40,  8.50it/s]


 epoch: 4959 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

input:       peninsula sometime in the early st millennium bc spread from there slowly west and eastward the hindu arabic numeral

target:      peninsula sometime in the early st millennium bc spread from there slowly west and eastward the hindu arabic numeral system

prediction:  peninsula sometime in the early st millennium bc spread from there slowly west and eastward the hindu arabic numeral the

 epoch: 4960 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 4961 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%


 33%|███▎      | 4964/15000 [09:50<16:45,  9.98it/s]


 epoch: 4962 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4963 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

 epoch: 4964 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%


 33%|███▎      | 4966/15000 [09:50<16:30, 10.13it/s]


 epoch: 4965 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 4966 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 33%|███▎      | 4968/15000 [09:51<27:40,  6.04it/s]


 epoch: 4967 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 4968 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 4969 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 33%|███▎      | 4970/15000 [09:51<24:23,  6.85it/s]


input:       repositories on the site multiple desktop clients and git plugins are also available in addition the site provides social

target:      repositories on the site multiple desktop clients and git plugins are also available in addition the site provides social networking

prediction:  repositories on the site multiple desktop clients and git plugins are also available in addition the site provides social the

 epoch: 4970 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4971 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 33%|███▎      | 4974/15000 [09:51<19:12,  8.70it/s]


 epoch: 4972 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 4973 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 4974 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 33%|███▎      | 4976/15000 [09:51<17:38,  9.47it/s]


 epoch: 4975 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 4976 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 4977 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.8%


 33%|███▎      | 4978/15000 [09:52<16:57,  9.85it/s]


 epoch: 4978 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 4979 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

input:       having increased by factor of at least reheating occurred until the universe obtained the temperatures required for the production

target:      having increased by factor of at least reheating occurred until the universe obtained the temperatures required for the production of

prediction:  having increased by factor of at least reheating occurred until the universe obtained the temperatures required for the production the

 epoch: 4980 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 33%|███▎      | 4980/15000 [09:52<16:55,  9.86it/s]




 33%|███▎      | 4982/15000 [09:52<28:43,  5.81it/s]


 epoch: 4981 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4982 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4983 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 33%|███▎      | 4986/15000 [09:53<21:09,  7.89it/s]


 epoch: 4984 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 4985 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4986 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%


 33%|███▎      | 4988/15000 [09:53<19:00,  8.78it/s]


 epoch: 4987 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 4988 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%

 epoch: 4989 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 33%|███▎      | 4990/15000 [09:53<18:42,  8.91it/s]


input:       supersymmetry is an active area of research areas of mathematics in general are important to this field such as

target:      supersymmetry is an active area of research areas of mathematics in general are important to this field such as the

prediction:  supersymmetry is an active area of research areas of mathematics in general are important to this field such as the

 epoch: 4990 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 4991 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 33%|███▎      | 4994/15000 [09:54<16:29, 10.11it/s]


 epoch: 4992 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4993 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 4994 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4995 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 33%|███▎      | 4998/15000 [09:54<23:20,  7.14it/s]


 epoch: 4996 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 4997 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 4998 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 33%|███▎      | 5000/15000 [09:55<21:04,  7.91it/s]


 epoch: 4999 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       atomic nuclei the most commonly known applications of nuclear physics are nuclear power generation and nuclear weapons technology but

target:      atomic nuclei the most commonly known applications of nuclear physics are nuclear power generation and nuclear weapons technology but the

prediction:  atomic nuclei the most commonly known applications of nuclear physics are nuclear power generation and nuclear weapons technology but the

 epoch: 5000 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5001 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 33%|███▎      | 5004/15000 [09:55<17:12,  9.69it/s]


 epoch: 5002 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 5003 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.2%

 epoch: 5004 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 33%|███▎      | 5006/15000 [09:55<16:33, 10.06it/s]


 epoch: 5005 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5006 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5007 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 33%|███▎      | 5008/15000 [09:55<16:15, 10.24it/s]


 epoch: 5008 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.2%

 epoch: 5009 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

input:       those who see language as socially learned tool of communication such as psychologist michael tomasello see it as having

target:      those who see language as socially learned tool of communication such as psychologist michael tomasello see it as having developed

prediction:  those who see language as socially learned tool of communication such as psychologist michael tomasello see it as having the


 33%|███▎      | 5012/15000 [09:56<19:02,  8.74it/s]


 epoch: 5010 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 5011 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5012 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%


 33%|███▎      | 5014/15000 [09:56<17:33,  9.48it/s]


 epoch: 5013 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5014 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5015 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 33%|███▎      | 5018/15000 [09:56<15:25, 10.78it/s]


 epoch: 5016 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5017 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5018 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 33%|███▎      | 5020/15000 [09:57<16:00, 10.39it/s]


 epoch: 5019 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

input:       hawaii by npr in in its great voices series new zealand as culture is western culture which is influenced

target:      hawaii by npr in in its great voices series new zealand as culture is western culture which is influenced by

prediction:  hawaii by npr in in its great voices series new zealand as culture is western culture which is influenced the

 epoch: 5020 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%


 33%|███▎      | 5022/15000 [09:57<15:41, 10.59it/s]


 epoch: 5021 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5022 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5023 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 34%|███▎      | 5026/15000 [09:57<22:59,  7.23it/s]


 epoch: 5024 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5025 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5026 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 34%|███▎      | 5028/15000 [09:58<20:22,  8.16it/s]


 epoch: 5027 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5028 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5029 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%


 34%|███▎      | 5030/15000 [09:58<19:27,  8.54it/s]


input:       bc the nok culture developed on the jos plateau it was highly centralized community the nok people produced lifelike

target:      bc the nok culture developed on the jos plateau it was highly centralized community the nok people produced lifelike representations

prediction:  bc the nok culture developed on the jos plateau it was highly centralized community the nok people produced lifelike the

 epoch: 5030 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5031 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 34%|███▎      | 5034/15000 [09:58<16:23, 10.13it/s]


 epoch: 5032 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5033 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5034 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.2%


 34%|███▎      | 5036/15000 [09:58<15:45, 10.54it/s]


 epoch: 5035 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5036 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5037 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 34%|███▎      | 5038/15000 [09:59<15:41, 10.58it/s]


 epoch: 5038 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 34%|███▎      | 5040/15000 [09:59<24:12,  6.86it/s]


 epoch: 5039 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.27, test_acc: 97.0%

input:       war that was won largely through the support in men and materiel from the colonies the british began to

target:      war that was won largely through the support in men and materiel from the colonies the british began to assert

prediction:  war that was won largely through the support in men and materiel from the colonies the british began to the

 epoch: 5040 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5041 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 34%|███▎      | 5043/15000 [09:59<21:01,  7.89it/s]


 epoch: 5042 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5043 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 34%|███▎      | 5046/15000 [10:00<19:09,  8.66it/s]


 epoch: 5044 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5045 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5046 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 34%|███▎      | 5049/15000 [10:00<17:40,  9.38it/s]


 epoch: 5047 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5048 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5049 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 34%|███▎      | 5050/15000 [10:00<21:32,  7.70it/s]


input:       attention to the origin both of laughter and of smiling as well as the development of the play instinct

target:      attention to the origin both of laughter and of smiling as well as the development of the play instinct and

prediction:  attention to the origin both of laughter and of smiling as well as the development of the play instinct of

 epoch: 5050 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 96.9%


 34%|███▎      | 5052/15000 [10:00<20:01,  8.28it/s]


 epoch: 5051 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5052 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%


 34%|███▎      | 5054/15000 [10:01<22:43,  7.29it/s]


 epoch: 5053 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5054 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%


 34%|███▎      | 5056/15000 [10:01<21:10,  7.83it/s]


 epoch: 5055 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5056 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%


 34%|███▎      | 5058/15000 [10:01<19:24,  8.54it/s]


 epoch: 5057 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.26, test_acc: 97.0%

 epoch: 5058 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 34%|███▎      | 5060/15000 [10:01<21:58,  7.54it/s]


 epoch: 5059 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

input:       boundaries was thus gradual process the bering strait and bering sea separate the landmasses of asia and north america

target:      boundaries was thus gradual process the bering strait and bering sea separate the landmasses of asia and north america as

prediction:  boundaries was thus gradual process the bering strait and bering sea separate the landmasses of asia and north america the

 epoch: 5060 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 34%|███▎      | 5062/15000 [10:02<21:12,  7.81it/s]


 epoch: 5061 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5062 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 34%|███▍      | 5064/15000 [10:02<20:36,  8.04it/s]


 epoch: 5063 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5064 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 34%|███▍      | 5066/15000 [10:02<21:05,  7.85it/s]


 epoch: 5065 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5066 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 34%|███▍      | 5069/15000 [10:03<30:24,  5.44it/s]


 epoch: 5067 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5068 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5069 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 34%|███▍      | 5070/15000 [10:03<27:56,  5.92it/s]


input:       around bce aristotle in his work poetics stated that comedy originated in phallic processions and the light treatment of

target:      around bce aristotle in his work poetics stated that comedy originated in phallic processions and the light treatment of the

prediction:  around bce aristotle in his work poetics stated that comedy originated in phallic processions and the light treatment of the

 epoch: 5070 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5071 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 34%|███▍      | 5074/15000 [10:03<19:20,  8.55it/s]


 epoch: 5072 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5073 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.7%

 epoch: 5074 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 34%|███▍      | 5076/15000 [10:04<17:35,  9.40it/s]


 epoch: 5075 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5076 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5077 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 34%|███▍      | 5078/15000 [10:04<16:41,  9.91it/s]


 epoch: 5078 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5079 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

input:       yu changsen wrote that some stress narrow vision of the pacific as those pacific islands excluding australia and even

target:      yu changsen wrote that some stress narrow vision of the pacific as those pacific islands excluding australia and even sometimes

prediction:  yu changsen wrote that some stress narrow vision of the pacific as those pacific islands excluding australia and even the


 34%|███▍      | 5080/15000 [10:04<16:49,  9.83it/s]


 epoch: 5080 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5081 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 34%|███▍      | 5084/15000 [10:05<19:44,  8.37it/s]


 epoch: 5082 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5083 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5084 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 34%|███▍      | 5087/15000 [10:05<17:07,  9.65it/s]


 epoch: 5085 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5086 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5087 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 34%|███▍      | 5089/15000 [10:05<16:11, 10.21it/s]


 epoch: 5088 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5089 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

input:       and was the first european to suggest that the americas represented landmass not then known to europeans in waldseem

target:      and was the first european to suggest that the americas represented landmass not then known to europeans in waldseem ller

prediction:  and was the first european to suggest that the americas represented landmass not then known to europeans in waldseem the


 34%|███▍      | 5091/15000 [10:05<16:43,  9.87it/s]


 epoch: 5090 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5091 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5092 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%


 34%|███▍      | 5095/15000 [10:06<15:33, 10.62it/s]


 epoch: 5093 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5094 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 5095 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 34%|███▍      | 5097/15000 [10:06<16:26, 10.03it/s]


 epoch: 5096 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5097 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5098 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 34%|███▍      | 5101/15000 [10:06<16:01, 10.29it/s]


 epoch: 5099 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

input:       archimedes are still used in mathematical teaching today the greeks developed astronomy which they treated as branch of mathematics

target:      archimedes are still used in mathematical teaching today the greeks developed astronomy which they treated as branch of mathematics to

prediction:  archimedes are still used in mathematical teaching today the greeks developed astronomy which they treated as branch of mathematics the

 epoch: 5100 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5101 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 34%|███▍      | 5103/15000 [10:06<15:21, 10.74it/s]


 epoch: 5102 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 5103 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5104 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 34%|███▍      | 5107/15000 [10:07<15:04, 10.94it/s]


 epoch: 5105 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5106 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5107 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 34%|███▍      | 5109/15000 [10:07<15:02, 10.96it/s]


 epoch: 5108 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5109 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

input:       binet death stanford professor lewis terman modified the binet simon scale renamed the stanford binet scale and introduced the

target:      binet death stanford professor lewis terman modified the binet simon scale renamed the stanford binet scale and introduced the intelligence

prediction:  binet death stanford professor lewis terman modified the binet simon scale renamed the stanford binet scale and introduced the the


 34%|███▍      | 5111/15000 [10:08<28:07,  5.86it/s]


 epoch: 5110 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5111 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5112 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 34%|███▍      | 5115/15000 [10:08<21:01,  7.83it/s]


 epoch: 5113 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5114 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5115 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 34%|███▍      | 5117/15000 [10:08<19:06,  8.62it/s]


 epoch: 5116 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5117 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5118 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%


 34%|███▍      | 5119/15000 [10:08<17:38,  9.34it/s]


 epoch: 5119 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

input:       administered as dependency of saint helena ascension and tristan da cunha the island is geopolitically part of africa an

target:      administered as dependency of saint helena ascension and tristan da cunha the island is geopolitically part of africa an uninhabited

prediction:  administered as dependency of saint helena ascension and tristan da cunha the island is geopolitically part of africa an the

 epoch: 5120 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.4%


 34%|███▍      | 5123/15000 [10:09<16:53,  9.75it/s]


 epoch: 5121 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5122 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 5123 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5124 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 34%|███▍      | 5127/15000 [10:10<23:18,  7.06it/s]


 epoch: 5125 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5126 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5127 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%


 34%|███▍      | 5129/15000 [10:10<20:38,  7.97it/s]


 epoch: 5128 | train_loss: 0.27, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5129 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       the th century have had several vastly different geographic meanings since their inception the chief factor in determining which

target:      the th century have had several vastly different geographic meanings since their inception the chief factor in determining which islands

prediction:  the th century have had several vastly different geographic meanings since their inception the chief factor in determining which the

 epoch: 5130 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 34%|███▍      | 5133/15000 [10:10<17:43,  9.27it/s]


 epoch: 5131 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5132 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.8%

 epoch: 5133 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.8%


 34%|███▍      | 5135/15000 [10:10<16:38,  9.88it/s]


 epoch: 5134 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5135 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5136 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.3%


 34%|███▍      | 5137/15000 [10:10<15:52, 10.35it/s]


 epoch: 5137 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5138 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 34%|███▍      | 5139/15000 [10:11<16:48,  9.78it/s]


 epoch: 5139 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.2%

input:       hattusa was sacked ending the hittite empire israel and judah were related iron age kingdoms of the ancient levant

target:      hattusa was sacked ending the hittite empire israel and judah were related iron age kingdoms of the ancient levant and

prediction:  hattusa was sacked ending the hittite empire israel and judah were related iron age kingdoms of the ancient levant the

 epoch: 5140 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.9%


 34%|███▍      | 5143/15000 [10:11<15:56, 10.31it/s]


 epoch: 5141 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5142 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5143 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 34%|███▍      | 5145/15000 [10:11<15:24, 10.66it/s]


 epoch: 5144 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%

 epoch: 5145 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5146 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 34%|███▍      | 5149/15000 [10:12<14:47, 11.10it/s]


 epoch: 5147 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5148 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5149 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 34%|███▍      | 5151/15000 [10:12<15:31, 10.58it/s]


input:       at the end of the cold war and fall of the ussr resulted in severe economic and political turmoil

target:      at the end of the cold war and fall of the ussr resulted in severe economic and political turmoil in

prediction:  at the end of the cold war and fall of the ussr resulted in severe economic and political turmoil the

 epoch: 5150 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5151 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 34%|███▍      | 5153/15000 [10:12<15:15, 10.76it/s]


 epoch: 5152 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 5153 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5154 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 34%|███▍      | 5157/15000 [10:12<14:35, 11.24it/s]


 epoch: 5155 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5156 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 5157 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 34%|███▍      | 5159/15000 [10:12<14:24, 11.38it/s]


 epoch: 5158 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5159 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

input:       mainly in the computing field individual software projects commonly use five programming languages or more programming languages differ from

target:      mainly in the computing field individual software projects commonly use five programming languages or more programming languages differ from most

prediction:  mainly in the computing field individual software projects commonly use five programming languages or more programming languages differ from the

 epoch: 5160 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 34%|███▍      | 5161/15000 [10:13<15:21, 10.67it/s]


 epoch: 5161 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5162 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5163 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 34%|███▍      | 5163/15000 [10:13<15:43, 10.43it/s]



 epoch: 5164 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5165 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 34%|███▍      | 5165/15000 [10:13<16:00, 10.24it/s]



 epoch: 5166 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 34%|███▍      | 5167/15000 [10:13<17:00,  9.64it/s]


 epoch: 5167 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 34%|███▍      | 5169/15000 [10:14<20:22,  8.04it/s]


 epoch: 5168 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5169 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

input:       the east as reinforcements for his own currently unsuccessful campaign against shapur ii of persia this order led the


 34%|███▍      | 5171/15000 [10:14<21:11,  7.73it/s]


target:      the east as reinforcements for his own currently unsuccessful campaign against shapur ii of persia this order led the gallic

prediction:  the east as reinforcements for his own currently unsuccessful campaign against shapur ii of persia this order led the the

 epoch: 5170 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5171 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 34%|███▍      | 5173/15000 [10:14<20:23,  8.03it/s]


 epoch: 5172 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5173 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 34%|███▍      | 5175/15000 [10:14<19:39,  8.33it/s]


 epoch: 5174 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5175 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 35%|███▍      | 5177/15000 [10:15<18:58,  8.63it/s]


 epoch: 5176 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5177 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 35%|███▍      | 5179/15000 [10:15<19:52,  8.24it/s]


 epoch: 5178 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5179 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 35%|███▍      | 5180/15000 [10:15<22:51,  7.16it/s]


input:       held at different locations in europe and with wide international participation the sixth congress held in geneva in included

target:      held at different locations in europe and with wide international participation the sixth congress held in geneva in included presentations

prediction:  held at different locations in europe and with wide international participation the sixth congress held in geneva in included the

 epoch: 5180 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 35%|███▍      | 5182/15000 [10:16<32:18,  5.07it/s]


 epoch: 5181 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5182 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 35%|███▍      | 5184/15000 [10:16<25:59,  6.29it/s]


 epoch: 5183 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5184 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 35%|███▍      | 5186/15000 [10:16<22:22,  7.31it/s]


 epoch: 5185 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5186 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%


 35%|███▍      | 5189/15000 [10:16<17:09,  9.53it/s]


 epoch: 5187 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5188 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5189 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 35%|███▍      | 5190/15000 [10:16<18:10,  8.99it/s]


input:       by bombarding fort sumter federal garrison in charleston harbor in south carolina the american civil war the deadliest military

target:      by bombarding fort sumter federal garrison in charleston harbor in south carolina the american civil war the deadliest military conflict

prediction:  by bombarding fort sumter federal garrison in charleston harbor in south carolina the american civil war the deadliest military the

 epoch: 5190 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5191 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 35%|███▍      | 5194/15000 [10:17<15:46, 10.36it/s]


 epoch: 5192 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5193 | train_loss: 0.25, train_acc: 96.6% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5194 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%


 35%|███▍      | 5196/15000 [10:17<15:47, 10.35it/s]


 epoch: 5195 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5196 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%


 35%|███▍      | 5198/15000 [10:18<28:21,  5.76it/s]


 epoch: 5197 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.1%

 epoch: 5198 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5199 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 35%|███▍      | 5200/15000 [10:18<24:31,  6.66it/s]


input:       of minecraft was released on november port was made available for windows phones shortly after microsoft acquired mojang the

target:      of minecraft was released on november port was made available for windows phones shortly after microsoft acquired mojang the port

prediction:  of minecraft was released on november port was made available for windows phones shortly after microsoft acquired mojang the the

 epoch: 5200 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5201 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 35%|███▍      | 5204/15000 [10:18<19:06,  8.55it/s]


 epoch: 5202 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5203 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5204 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 35%|███▍      | 5206/15000 [10:18<17:29,  9.33it/s]


 epoch: 5205 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5206 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5207 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.2%


 35%|███▍      | 5210/15000 [10:19<16:15, 10.03it/s]


 epoch: 5208 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5209 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       predominate in north asia and some parts of northern kazakhstan europe has about indigenous languages mostly falling within three

target:      predominate in north asia and some parts of northern kazakhstan europe has about indigenous languages mostly falling within three indo

prediction:  predominate in north asia and some parts of northern kazakhstan europe has about indigenous languages mostly falling within three the

 epoch: 5210 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 35%|███▍      | 5212/15000 [10:19<23:56,  6.81it/s]


 epoch: 5211 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5212 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5213 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%


 35%|███▍      | 5216/15000 [10:20<18:45,  8.69it/s]


 epoch: 5214 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5215 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5216 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 35%|███▍      | 5218/15000 [10:20<17:15,  9.45it/s]


 epoch: 5217 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5218 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 5219 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 35%|███▍      | 5220/15000 [10:20<17:33,  9.28it/s]


input:       the humour derived gets its appeal from the ridiculousness and unlikeliness of the situation the genre has roots in

target:      the humour derived gets its appeal from the ridiculousness and unlikeliness of the situation the genre has roots in surrealism

prediction:  the humour derived gets its appeal from the ridiculousness and unlikeliness of the situation the genre has roots in the

 epoch: 5220 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5221 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 35%|███▍      | 5224/15000 [10:20<15:48, 10.31it/s]


 epoch: 5222 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5223 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5224 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5225 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 35%|███▍      | 5228/15000 [10:21<17:01,  9.57it/s]


 epoch: 5226 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5227 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

 epoch: 5228 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.7%


 35%|███▍      | 5230/15000 [10:21<17:05,  9.53it/s]


 epoch: 5229 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       of all levels including post secondary institutions community organizations and learning centers government or private research firms and independent

target:      of all levels including post secondary institutions community organizations and learning centers government or private research firms and independent or

prediction:  of all levels including post secondary institutions community organizations and learning centers government or private research firms and independent the

 epoch: 5230 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.7%

 epoch: 5231 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 35%|███▍      | 5234/15000 [10:21<15:08, 10.74it/s]


 epoch: 5232 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5233 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5234 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 35%|███▍      | 5236/15000 [10:22<15:07, 10.76it/s]


 epoch: 5235 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 5236 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%

 epoch: 5237 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 35%|███▍      | 5238/15000 [10:22<14:55, 10.90it/s]


 epoch: 5238 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5239 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

input:       work that was of ground breaking importance in terms of composition two of the three were never finished and

target:      work that was of ground breaking importance in terms of composition two of the three were never finished and the

prediction:  work that was of ground breaking importance in terms of composition two of the three were never finished and the


 35%|███▍      | 5242/15000 [10:23<22:22,  7.27it/s]


 epoch: 5240 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5241 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5242 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 35%|███▍      | 5244/15000 [10:23<19:46,  8.23it/s]


 epoch: 5243 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5244 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5245 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 35%|███▍      | 5248/15000 [10:23<16:39,  9.76it/s]


 epoch: 5246 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 5247 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5248 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 35%|███▌      | 5250/15000 [10:23<16:35,  9.80it/s]


 epoch: 5249 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

input:       this century it was during this period that the european colonization of the americas began in earnest including the

target:      this century it was during this period that the european colonization of the americas began in earnest including the exploitation

prediction:  this century it was during this period that the european colonization of the americas began in earnest including the the

 epoch: 5250 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5251 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 35%|███▌      | 5254/15000 [10:24<15:49, 10.26it/s]


 epoch: 5252 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5253 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5254 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 35%|███▌      | 5256/15000 [10:24<15:06, 10.75it/s]


 epoch: 5255 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5256 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5257 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 35%|███▌      | 5260/15000 [10:24<15:03, 10.78it/s]


 epoch: 5258 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5259 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%

input:       commonwealth games three times auckland christchurch and auckland the pacific games formerly known as the south pacific games is

target:      commonwealth games three times auckland christchurch and auckland the pacific games formerly known as the south pacific games is multi

prediction:  commonwealth games three times auckland christchurch and auckland the pacific games formerly known as the south pacific games is the

 epoch: 5260 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 35%|███▌      | 5262/15000 [10:24<14:45, 11.00it/s]


 epoch: 5261 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5262 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5263 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 35%|███▌      | 5266/15000 [10:25<14:35, 11.12it/s]


 epoch: 5264 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5265 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 5266 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5267 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 35%|███▌      | 5270/15000 [10:26<22:42,  7.14it/s]


 epoch: 5268 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5269 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       temperature collision dynamics and the effects of electron correlation on structure and dynamics atomic physics is influenced by the

target:      temperature collision dynamics and the effects of electron correlation on structure and dynamics atomic physics is influenced by the nucleus

prediction:  temperature collision dynamics and the effects of electron correlation on structure and dynamics atomic physics is influenced by the the

 epoch: 5270 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 35%|███▌      | 5273/15000 [10:26<19:09,  8.46it/s]


 epoch: 5271 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5272 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5273 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%


 35%|███▌      | 5275/15000 [10:26<17:22,  9.33it/s]


 epoch: 5274 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5275 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5276 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 35%|███▌      | 5277/15000 [10:26<16:23,  9.89it/s]


 epoch: 5277 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5278 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 35%|███▌      | 5279/15000 [10:26<16:49,  9.63it/s]


 epoch: 5279 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       addition there is new impetus from the chinese belt and road initiative across the suez canal towards africa and

target:      addition there is new impetus from the chinese belt and road initiative across the suez canal towards africa and asia

prediction:  addition there is new impetus from the chinese belt and road initiative across the suez canal towards africa and the

 epoch: 5280 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 35%|███▌      | 5281/15000 [10:27<18:21,  8.82it/s]


 epoch: 5281 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 35%|███▌      | 5283/15000 [10:27<29:03,  5.57it/s]


 epoch: 5282 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 96.8%

 epoch: 5283 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 35%|███▌      | 5286/15000 [10:28<22:08,  7.31it/s]


 epoch: 5284 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5285 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5286 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 35%|███▌      | 5288/15000 [10:28<21:26,  7.55it/s]


 epoch: 5287 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5288 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 35%|███▌      | 5290/15000 [10:28<22:46,  7.11it/s]


 epoch: 5289 | train_loss: 0.23, train_acc: 96.4% | test_loss: 0.23, test_acc: 97.4%

input:       that are important for asia individual main routes have emerged from this the main route leads from the chinese

target:      that are important for asia individual main routes have emerged from this the main route leads from the chinese coast

prediction:  that are important for asia individual main routes have emerged from this the main route leads from the chinese the

 epoch: 5290 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%


 35%|███▌      | 5292/15000 [10:28<21:28,  7.53it/s]


 epoch: 5291 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5292 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 35%|███▌      | 5294/15000 [10:29<20:02,  8.07it/s]


 epoch: 5293 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5294 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 35%|███▌      | 5296/15000 [10:29<19:23,  8.34it/s]


 epoch: 5295 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5296 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.2%


 35%|███▌      | 5299/15000 [10:29<24:08,  6.70it/s]


 epoch: 5297 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5298 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 5299 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 35%|███▌      | 5300/15000 [10:30<23:30,  6.87it/s]


input:       murder in orchestrated by his enemies in the senate stephanus the steward of the deceased julia flavia members of

target:      murder in orchestrated by his enemies in the senate stephanus the steward of the deceased julia flavia members of the

prediction:  murder in orchestrated by his enemies in the senate stephanus the steward of the deceased julia flavia members of the

 epoch: 5300 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%

 epoch: 5301 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 35%|███▌      | 5304/15000 [10:30<17:20,  9.32it/s]


 epoch: 5302 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5303 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5304 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 35%|███▌      | 5306/15000 [10:30<16:06, 10.03it/s]


 epoch: 5305 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5306 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5307 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 35%|███▌      | 5310/15000 [10:30<15:28, 10.44it/s]


 epoch: 5308 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5309 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

input:       forces which drove the french from milan leonardo stayed in the city spending several months in at the medici

target:      forces which drove the french from milan leonardo stayed in the city spending several months in at the medici vaprio

prediction:  forces which drove the french from milan leonardo stayed in the city spending several months in at the medici the

 epoch: 5310 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 35%|███▌      | 5312/15000 [10:31<21:04,  7.66it/s]


 epoch: 5311 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5312 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5313 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 35%|███▌      | 5316/15000 [10:31<16:57,  9.52it/s]


 epoch: 5314 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5315 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5316 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%


 35%|███▌      | 5318/15000 [10:31<15:58, 10.10it/s]


 epoch: 5317 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5318 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5319 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 35%|███▌      | 5320/15000 [10:32<16:05, 10.02it/s]


input:       netflix success encouraged the creation of numerous other streaming services such as hulu youtube premium amazon prime video and

target:      netflix success encouraged the creation of numerous other streaming services such as hulu youtube premium amazon prime video and disney

prediction:  netflix success encouraged the creation of numerous other streaming services such as hulu youtube premium amazon prime video and the

 epoch: 5320 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%

 epoch: 5321 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 35%|███▌      | 5324/15000 [10:32<15:04, 10.70it/s]


 epoch: 5322 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5323 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5324 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 36%|███▌      | 5326/15000 [10:33<26:41,  6.04it/s]


 epoch: 5325 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5326 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5327 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 36%|███▌      | 5330/15000 [10:33<20:27,  7.88it/s]


 epoch: 5328 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5329 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

input:       first domesticated by about bc small tribes living in the nile valley had developed into series of cultures demonstrating

target:      first domesticated by about bc small tribes living in the nile valley had developed into series of cultures demonstrating firm

prediction:  first domesticated by about bc small tribes living in the nile valley had developed into series of cultures demonstrating the

 epoch: 5330 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%


 36%|███▌      | 5332/15000 [10:33<18:19,  8.80it/s]


 epoch: 5331 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5332 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5333 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.3%


 36%|███▌      | 5336/15000 [10:33<16:00, 10.06it/s]


 epoch: 5334 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 5335 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 5336 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 36%|███▌      | 5338/15000 [10:34<15:43, 10.24it/s]


 epoch: 5337 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5338 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5339 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

input:       ideologies and they often overlap in various ways for example teacher centered ideologies place the main emphasis on the

target:      ideologies and they often overlap in various ways for example teacher centered ideologies place the main emphasis on the teacher

prediction:  ideologies and they often overlap in various ways for example teacher centered ideologies place the main emphasis on the the


 36%|███▌      | 5342/15000 [10:35<23:18,  6.91it/s]


 epoch: 5340 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5341 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5342 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%


 36%|███▌      | 5344/15000 [10:35<20:22,  7.90it/s]


 epoch: 5343 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5344 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5345 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 36%|███▌      | 5348/15000 [10:35<16:50,  9.55it/s]


 epoch: 5346 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5347 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5348 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 36%|███▌      | 5350/15000 [10:35<16:43,  9.61it/s]


 epoch: 5349 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       nightclubs colleges or theatres however it is best suited to the controlled environment of purpose built comedy club citation

target:      nightclubs colleges or theatres however it is best suited to the controlled environment of purpose built comedy club citation needed

prediction:  nightclubs colleges or theatres however it is best suited to the controlled environment of purpose built comedy club citation the

 epoch: 5350 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 36%|███▌      | 5352/15000 [10:35<16:05,  9.99it/s]


 epoch: 5351 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5352 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5353 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 36%|███▌      | 5356/15000 [10:36<18:43,  8.58it/s]


 epoch: 5354 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5355 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5356 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 36%|███▌      | 5358/15000 [10:36<17:11,  9.34it/s]


 epoch: 5357 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5358 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5359 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 36%|███▌      | 5360/15000 [10:36<17:05,  9.40it/s]


input:       achievement but being ordinary or average is promoted by some as noble condition as well the united states is

target:      achievement but being ordinary or average is promoted by some as noble condition as well the united states is considered

prediction:  achievement but being ordinary or average is promoted by some as noble condition as well the united states is the

 epoch: 5360 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5361 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 36%|███▌      | 5364/15000 [10:37<15:32, 10.33it/s]


 epoch: 5362 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5363 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5364 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.3%


 36%|███▌      | 5366/15000 [10:37<15:17, 10.50it/s]


 epoch: 5365 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 5366 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5367 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 36%|███▌      | 5370/15000 [10:38<23:15,  6.90it/s]


 epoch: 5368 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5369 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

input:       of photosynthesis the sun energy could be harvested to create conditions which allowed for more complex life forms the

target:      of photosynthesis the sun energy could be harvested to create conditions which allowed for more complex life forms the resultant

prediction:  of photosynthesis the sun energy could be harvested to create conditions which allowed for more complex life forms the the

 epoch: 5370 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%


 36%|███▌      | 5372/15000 [10:38<20:14,  7.93it/s]


 epoch: 5371 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5372 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5373 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 36%|███▌      | 5376/15000 [10:38<16:36,  9.66it/s]


 epoch: 5374 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5375 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5376 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 36%|███▌      | 5378/15000 [10:38<15:53, 10.09it/s]


 epoch: 5377 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%

 epoch: 5378 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5379 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 36%|███▌      | 5380/15000 [10:39<16:12,  9.89it/s]


input:       note that psychologists train in university psychology departments and ob specialists in business schools one role for psychologists

target:      note that psychologists train in university psychology departments and ob specialists in business schools one role for psychologists in

prediction:  note that psychologists train in university psychology departments and ob specialists in business schools one role for psychologists the

 epoch: 5380 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5381 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 36%|███▌      | 5382/15000 [10:39<15:51, 10.11it/s]


 epoch: 5382 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 36%|███▌      | 5384/15000 [10:39<26:53,  5.96it/s]


 epoch: 5383 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5384 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5385 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 36%|███▌      | 5388/15000 [10:40<20:53,  7.67it/s]


 epoch: 5386 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5387 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5388 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 36%|███▌      | 5390/15000 [10:40<21:32,  7.44it/s]


 epoch: 5389 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

input:       to develop of their own accord the concept of nature as whole the physical universe is one of several

target:      to develop of their own accord the concept of nature as whole the physical universe is one of several expansions

prediction:  to develop of their own accord the concept of nature as whole the physical universe is one of several the

 epoch: 5390 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 36%|███▌      | 5392/15000 [10:40<19:32,  8.19it/s]


 epoch: 5391 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5392 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 36%|███▌      | 5394/15000 [10:41<19:04,  8.40it/s]


 epoch: 5393 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5394 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 36%|███▌      | 5396/15000 [10:41<19:15,  8.31it/s]


 epoch: 5395 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 5396 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 36%|███▌      | 5398/15000 [10:41<18:52,  8.48it/s]


 epoch: 5397 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5398 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 36%|███▌      | 5400/15000 [10:41<19:49,  8.07it/s]


 epoch: 5399 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

input:       addition of new rock units both depositionally and intrusively often occurs during deformation faulting and other deformational processes result

target:      addition of new rock units both depositionally and intrusively often occurs during deformation faulting and other deformational processes result in

prediction:  addition of new rock units both depositionally and intrusively often occurs during deformation faulting and other deformational processes result the

 epoch: 5400 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 36%|███▌      | 5402/15000 [10:42<18:43,  8.54it/s]


 epoch: 5401 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 5402 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 36%|███▌      | 5404/15000 [10:42<18:18,  8.74it/s]


 epoch: 5403 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5404 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 36%|███▌      | 5406/15000 [10:42<17:46,  9.00it/s]


 epoch: 5405 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5406 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 36%|███▌      | 5408/15000 [10:42<18:18,  8.73it/s]


 epoch: 5407 | train_loss: 0.23, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5408 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 36%|███▌      | 5410/15000 [10:43<21:38,  7.39it/s]


 epoch: 5409 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

input:       was announced on may and the project accepts waitlist registrations the verge said that github sponsors works exactly like

target:      was announced on may and the project accepts waitlist registrations the verge said that github sponsors works exactly like patreon

prediction:  was announced on may and the project accepts waitlist registrations the verge said that github sponsors works exactly like the

 epoch: 5410 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.3%


 36%|███▌      | 5413/15000 [10:43<28:22,  5.63it/s]


 epoch: 5411 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%

 epoch: 5412 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5413 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 36%|███▌      | 5415/15000 [10:43<22:22,  7.14it/s]


 epoch: 5414 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5415 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5416 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 36%|███▌      | 5419/15000 [10:44<17:05,  9.34it/s]


 epoch: 5417 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5418 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5419 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 36%|███▌      | 5421/15000 [10:44<16:57,  9.41it/s]


input:       halves ruled by multiple emperors constantine the great began the process of christianizing the empire and established new capital

target:      halves ruled by multiple emperors constantine the great began the process of christianizing the empire and established new capital at

prediction:  halves ruled by multiple emperors constantine the great began the process of christianizing the empire and established new capital the

 epoch: 5420 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5421 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 36%|███▌      | 5423/15000 [10:44<16:17,  9.80it/s]


 epoch: 5422 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5423 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5424 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 36%|███▌      | 5427/15000 [10:45<23:24,  6.82it/s]


 epoch: 5425 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5426 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.3%

 epoch: 5427 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 36%|███▌      | 5430/15000 [10:45<20:16,  7.87it/s]


 epoch: 5428 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.25, test_acc: 96.8%

 epoch: 5429 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       api is way for two or more computer programs to communicate with each other it is type of software

target:      api is way for two or more computer programs to communicate with each other it is type of software interface

prediction:  api is way for two or more computer programs to communicate with each other it is type of software the

 epoch: 5430 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 36%|███▌      | 5432/15000 [10:45<18:22,  8.68it/s]


 epoch: 5431 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 5432 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5433 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 36%|███▌      | 5436/15000 [10:46<16:07,  9.88it/s]


 epoch: 5434 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5435 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5436 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 36%|███▋      | 5438/15000 [10:46<15:28, 10.29it/s]


 epoch: 5437 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5438 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5439 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

input:       receives instead of industrialization some attribute the first use of this term to daniel bell book the coming of

target:      receives instead of industrialization some attribute the first use of this term to daniel bell book the coming of post

prediction:  receives instead of industrialization some attribute the first use of this term to daniel bell book the coming of the


 36%|███▋      | 5442/15000 [10:47<18:32,  8.59it/s]


 epoch: 5440 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5441 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5442 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.1%


 36%|███▋      | 5445/15000 [10:47<16:23,  9.71it/s]


 epoch: 5443 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5444 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5445 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 36%|███▋      | 5447/15000 [10:47<15:28, 10.29it/s]


 epoch: 5446 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5447 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5448 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 36%|███▋      | 5449/15000 [10:47<14:41, 10.83it/s]


 epoch: 5449 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       century adding member states but the united kingdom withdrew most eu member states introduced common currency the euro the

target:      century adding member states but the united kingdom withdrew most eu member states introduced common currency the euro the north

prediction:  century adding member states but the united kingdom withdrew most eu member states introduced common currency the euro the the

 epoch: 5450 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 36%|███▋      | 5453/15000 [10:48<14:50, 10.72it/s]


 epoch: 5451 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5452 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5453 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 36%|███▋      | 5455/15000 [10:48<25:19,  6.28it/s]


 epoch: 5454 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5455 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5456 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 36%|███▋      | 5459/15000 [10:48<18:51,  8.43it/s]


 epoch: 5457 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5458 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5459 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%


 36%|███▋      | 5461/15000 [10:49<17:57,  8.85it/s]


input:       legislative power is vested in both the government and the parliament of fiji fiji head of state is the

target:      legislative power is vested in both the government and the parliament of fiji fiji head of state is the president

prediction:  legislative power is vested in both the government and the parliament of fiji fiji head of state is the the

 epoch: 5460 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5461 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 36%|███▋      | 5463/15000 [10:49<16:50,  9.43it/s]


 epoch: 5462 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.5%

 epoch: 5463 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5464 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 36%|███▋      | 5467/15000 [10:49<15:28, 10.27it/s]


 epoch: 5465 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5466 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5467 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5468 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 36%|███▋      | 5470/15000 [10:50<24:17,  6.54it/s]


 epoch: 5469 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.4%

input:       honours bachelor or higher degree in physics or closely related discipline must be provided also the physicist must have

target:      honours bachelor or higher degree in physics or closely related discipline must be provided also the physicist must have completed

prediction:  honours bachelor or higher degree in physics or closely related discipline must be provided also the physicist must have the

 epoch: 5470 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 5471 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 36%|███▋      | 5474/15000 [10:50<17:59,  8.82it/s]


 epoch: 5472 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5473 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5474 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 37%|███▋      | 5476/15000 [10:50<16:41,  9.51it/s]


 epoch: 5475 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5476 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5477 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 37%|███▋      | 5480/15000 [10:51<15:41, 10.11it/s]


 epoch: 5478 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 96.9%

 epoch: 5479 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

input:       peace and prosperity he was so loved that he came to hold the power of monarch de facto if

target:      peace and prosperity he was so loved that he came to hold the power of monarch de facto if not

prediction:  peace and prosperity he was so loved that he came to hold the power of monarch de facto if the

 epoch: 5480 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%


 37%|███▋      | 5482/15000 [10:51<15:11, 10.44it/s]


 epoch: 5481 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5482 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 37%|███▋      | 5484/15000 [10:51<17:41,  8.97it/s]


 epoch: 5483 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5484 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5485 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 37%|███▋      | 5488/15000 [10:52<15:20, 10.33it/s]


 epoch: 5486 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.4%

 epoch: 5487 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5488 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 37%|███▋      | 5490/15000 [10:52<15:30, 10.22it/s]


 epoch: 5489 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       or the senate preside over elections and it gave him the right to speak first at any meeting also

target:      or the senate preside over elections and it gave him the right to speak first at any meeting also included

prediction:  or the senate preside over elections and it gave him the right to speak first at any meeting also the

 epoch: 5490 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5491 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 37%|███▋      | 5494/15000 [10:52<14:22, 11.02it/s]


 epoch: 5492 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5493 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5494 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 37%|███▋      | 5496/15000 [10:52<14:26, 10.96it/s]


 epoch: 5495 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.9%

 epoch: 5496 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 37%|███▋      | 5498/15000 [10:53<26:04,  6.08it/s]


 epoch: 5497 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5498 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.9%


 37%|███▋      | 5500/15000 [10:53<24:28,  6.47it/s]


 epoch: 5499 | train_loss: 0.24, train_acc: 96.2% | test_loss: 0.25, test_acc: 97.2%

input:       capital and other venture capital funds as of update github was estimated to be generating billion in annual recurring

target:      capital and other venture capital funds as of update github was estimated to be generating billion in annual recurring revenue

prediction:  capital and other venture capital funds as of update github was estimated to be generating billion in annual recurring the

 epoch: 5500 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 37%|███▋      | 5502/15000 [10:54<21:36,  7.33it/s]


 epoch: 5501 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5502 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 37%|███▋      | 5504/15000 [10:54<20:10,  7.84it/s]


 epoch: 5503 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5504 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 37%|███▋      | 5506/15000 [10:54<19:29,  8.12it/s]


 epoch: 5505 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5506 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 37%|███▋      | 5508/15000 [10:54<18:47,  8.42it/s]


 epoch: 5507 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5508 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%


 37%|███▋      | 5510/15000 [10:54<20:59,  7.54it/s]


 epoch: 5509 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       urging of the influential and powerful julia maesa who was grandmother of both cousins and who had arranged for

target:      urging of the influential and powerful julia maesa who was grandmother of both cousins and who had arranged for the

prediction:  urging of the influential and powerful julia maesa who was grandmother of both cousins and who had arranged for the

 epoch: 5510 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%


 37%|███▋      | 5512/15000 [10:55<36:05,  4.38it/s]


 epoch: 5511 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5512 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 37%|███▋      | 5514/15000 [10:55<26:24,  5.99it/s]


 epoch: 5513 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 5514 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 37%|███▋      | 5516/15000 [10:56<23:19,  6.78it/s]


 epoch: 5515 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5516 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 37%|███▋      | 5519/15000 [10:56<17:59,  8.78it/s]


 epoch: 5517 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5518 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5519 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 37%|███▋      | 5520/15000 [10:56<18:31,  8.53it/s]


input:       will identify with major religions that came from outside the continent mainly through colonisation there are several reasons for

target:      will identify with major religions that came from outside the continent mainly through colonisation there are several reasons for this

prediction:  will identify with major religions that came from outside the continent mainly through colonisation there are several reasons for the

 epoch: 5520 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5521 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 37%|███▋      | 5524/15000 [10:56<15:49,  9.98it/s]


 epoch: 5522 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 5523 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.27, test_acc: 96.8%

 epoch: 5524 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5525 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 37%|███▋      | 5528/15000 [10:57<21:19,  7.40it/s]


 epoch: 5526 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5527 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5528 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 37%|███▋      | 5530/15000 [10:57<19:44,  8.00it/s]


 epoch: 5529 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

input:       founded june the spanish explorer alonso de salazar landed in the marshall islands in they were named by krusenstern

target:      founded june the spanish explorer alonso de salazar landed in the marshall islands in they were named by krusenstern after

prediction:  founded june the spanish explorer alonso de salazar landed in the marshall islands in they were named by krusenstern the

 epoch: 5530 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5531 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 37%|███▋      | 5534/15000 [10:58<16:04,  9.81it/s]


 epoch: 5532 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5533 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5534 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.2%


 37%|███▋      | 5536/15000 [10:58<15:31, 10.16it/s]


 epoch: 5535 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5536 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5537 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 37%|███▋      | 5538/15000 [10:58<14:58, 10.54it/s]


 epoch: 5538 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5539 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

input:       the leader of the entire roman army the senate re classified the provinces at the frontiers where the vast

target:      the leader of the entire roman army the senate re classified the provinces at the frontiers where the vast majority

prediction:  the leader of the entire roman army the senate re classified the provinces at the frontiers where the vast the


 37%|███▋      | 5542/15000 [10:58<14:37, 10.78it/s]


 epoch: 5540 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5541 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5542 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 37%|███▋      | 5544/15000 [10:59<14:10, 11.12it/s]


 epoch: 5543 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5544 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5545 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 37%|███▋      | 5548/15000 [10:59<13:26, 11.72it/s]


 epoch: 5546 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5547 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5548 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 37%|███▋      | 5550/15000 [10:59<14:37, 10.77it/s]


 epoch: 5549 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%

input:       produced great number of globally renowned comedy artists from laurel and hardy the three stooges abbott and costello dean

target:      produced great number of globally renowned comedy artists from laurel and hardy the three stooges abbott and costello dean martin

prediction:  produced great number of globally renowned comedy artists from laurel and hardy the three stooges abbott and costello dean the

 epoch: 5550 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 37%|███▋      | 5552/15000 [10:59<14:34, 10.81it/s]


 epoch: 5551 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5552 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5553 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 37%|███▋      | 5556/15000 [11:00<22:05,  7.13it/s]


 epoch: 5554 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5555 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5556 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 37%|███▋      | 5558/15000 [11:00<19:19,  8.15it/s]


 epoch: 5557 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 5558 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5559 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 37%|███▋      | 5560/15000 [11:01<18:30,  8.50it/s]


input:       on when the common ancestor of these species existed prokaryotes inhabited the earth from approximately billion years ago no

target:      on when the common ancestor of these species existed prokaryotes inhabited the earth from approximately billion years ago no obvious

prediction:  on when the common ancestor of these species existed prokaryotes inhabited the earth from approximately billion years ago no the

 epoch: 5560 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 5561 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 37%|███▋      | 5564/15000 [11:01<15:50,  9.93it/s]


 epoch: 5562 | train_loss: 0.19, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5563 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.1%

 epoch: 5564 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 37%|███▋      | 5566/15000 [11:01<15:17, 10.29it/s]


 epoch: 5565 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5566 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5567 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 37%|███▋      | 5568/15000 [11:01<14:44, 10.67it/s]


 epoch: 5568 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 37%|███▋      | 5570/15000 [11:02<19:09,  8.20it/s]


 epoch: 5569 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

input:       reconstruction this period also marked the beginning of the cold war with geopolitical tensions between the and the

target:      reconstruction this period also marked the beginning of the cold war with geopolitical tensions between the and the soviet

prediction:  reconstruction this period also marked the beginning of the cold war with geopolitical tensions between the and the in

 epoch: 5570 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5571 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 37%|███▋      | 5574/15000 [11:02<15:58,  9.83it/s]


 epoch: 5572 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5573 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5574 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 37%|███▋      | 5576/15000 [11:02<15:08, 10.37it/s]


 epoch: 5575 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5576 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%

 epoch: 5577 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 37%|███▋      | 5578/15000 [11:02<14:18, 10.97it/s]


 epoch: 5578 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5579 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       considered oceania to include all islands in the pacific and associated the term with the malay archipelago the islands

target:      considered oceania to include all islands in the pacific and associated the term with the malay archipelago the islands of

prediction:  considered oceania to include all islands in the pacific and associated the term with the malay archipelago the islands the


 37%|███▋      | 5582/15000 [11:03<14:35, 10.75it/s]


 epoch: 5580 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5581 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5582 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 37%|███▋      | 5584/15000 [11:03<24:41,  6.36it/s]


 epoch: 5583 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5584 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5585 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 37%|███▋      | 5588/15000 [11:04<18:45,  8.37it/s]


 epoch: 5586 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5587 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5588 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 37%|███▋      | 5590/15000 [11:04<17:48,  8.81it/s]


 epoch: 5589 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

input:       in full progress while in bc the world population stood at million it rose to million by bc by

target:      in full progress while in bc the world population stood at million it rose to million by bc by the

prediction:  in full progress while in bc the world population stood at million it rose to million by bc by the

 epoch: 5590 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5591 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 37%|███▋      | 5594/15000 [11:04<15:24, 10.18it/s]


 epoch: 5592 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5593 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5594 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 37%|███▋      | 5596/15000 [11:04<14:56, 10.49it/s]


 epoch: 5595 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

 epoch: 5596 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 37%|███▋      | 5598/15000 [11:05<23:34,  6.65it/s]


 epoch: 5597 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5598 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5599 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 37%|███▋      | 5600/15000 [11:05<21:15,  7.37it/s]


input:       to its liquid form or state but the substance also has solid state ice and gaseous state water vapor

target:      to its liquid form or state but the substance also has solid state ice and gaseous state water vapor or

prediction:  to its liquid form or state but the substance also has solid state ice and gaseous state water vapor the

 epoch: 5600 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5601 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 37%|███▋      | 5604/15000 [11:05<17:05,  9.17it/s]


 epoch: 5602 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5603 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5604 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 37%|███▋      | 5606/15000 [11:06<15:45,  9.94it/s]


 epoch: 5605 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 5606 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5607 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 37%|███▋      | 5608/15000 [11:06<15:19, 10.22it/s]


 epoch: 5608 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5609 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

input:       informal education in vernacular writing reading and mathematics possibly because his artistic talents were recognised early so his family

target:      informal education in vernacular writing reading and mathematics possibly because his artistic talents were recognised early so his family decided


 37%|███▋      | 5610/15000 [11:06<16:58,  9.22it/s]


prediction:  informal education in vernacular writing reading and mathematics possibly because his artistic talents were recognised early so his family the

 epoch: 5610 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5611 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 37%|███▋      | 5613/15000 [11:06<17:58,  8.70it/s]


 epoch: 5612 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5613 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 37%|███▋      | 5615/15000 [11:07<18:34,  8.42it/s]


 epoch: 5614 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5615 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 37%|███▋      | 5617/15000 [11:07<18:07,  8.63it/s]


 epoch: 5616 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5617 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.4%


 37%|███▋      | 5619/15000 [11:07<17:48,  8.78it/s]


 epoch: 5618 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5619 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

input:       modalities the learner personality may also affect educational achievement for example the features of conscientiousness and openness to experience


 37%|███▋      | 5621/15000 [11:07<20:26,  7.65it/s]


target:      modalities the learner personality may also affect educational achievement for example the features of conscientiousness and openness to experience from

prediction:  modalities the learner personality may also affect educational achievement for example the features of conscientiousness and openness to experience the

 epoch: 5620 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5621 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 37%|███▋      | 5623/15000 [11:08<20:22,  7.67it/s]


 epoch: 5622 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5623 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 38%|███▊      | 5625/15000 [11:08<19:56,  7.84it/s]


 epoch: 5624 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5625 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 38%|███▊      | 5627/15000 [11:08<28:46,  5.43it/s]


 epoch: 5626 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5627 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 38%|███▊      | 5629/15000 [11:09<23:21,  6.69it/s]


 epoch: 5628 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5629 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

input:       the century europeans were masters of logarithms electricity the telescope and microscope calculus universal gravitation newton laws of motion


 38%|███▊      | 5631/15000 [11:09<22:12,  7.03it/s]


target:      the century europeans were masters of logarithms electricity the telescope and microscope calculus universal gravitation newton laws of motion air

prediction:  the century europeans were masters of logarithms electricity the telescope and microscope calculus universal gravitation newton laws of motion the

 epoch: 5630 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5631 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 38%|███▊      | 5633/15000 [11:09<19:59,  7.81it/s]


 epoch: 5632 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5633 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 38%|███▊      | 5636/15000 [11:10<17:09,  9.09it/s]


 epoch: 5634 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5635 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5636 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 38%|███▊      | 5639/15000 [11:10<15:24, 10.13it/s]


 epoch: 5637 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5638 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5639 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 38%|███▊      | 5641/15000 [11:10<17:07,  9.10it/s]


input:       when it was occupied by the french the region was initially partitioned between the dutch french and british before

target:      when it was occupied by the french the region was initially partitioned between the dutch french and british before fully

prediction:  when it was occupied by the french the region was initially partitioned between the dutch french and british before the

 epoch: 5640 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5641 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 38%|███▊      | 5643/15000 [11:10<15:55,  9.79it/s]


 epoch: 5642 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5643 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5644 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 38%|███▊      | 5647/15000 [11:11<14:32, 10.72it/s]


 epoch: 5645 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5646 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5647 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 38%|███▊      | 5649/15000 [11:11<14:25, 10.80it/s]


 epoch: 5648 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5649 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

input:       minutes of streaming per person and slots on july rpan studio was released an application that allows users to

target:      minutes of streaming per person and slots on july rpan studio was released an application that allows users to broadcast

prediction:  minutes of streaming per person and slots on july rpan studio was released an application that allows users to the

 epoch: 5650 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 38%|███▊      | 5653/15000 [11:11<14:11, 10.98it/s]


 epoch: 5651 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5652 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5653 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5654 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%


 38%|███▊      | 5657/15000 [11:12<16:32,  9.42it/s]


 epoch: 5655 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5656 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 96.9%

 epoch: 5657 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 38%|███▊      | 5659/15000 [11:12<15:32, 10.02it/s]


 epoch: 5658 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5659 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       by sea from egyptian ports the main commodity was grain also traded were olive oil foodstuffs garum fish sauce

target:      by sea from egyptian ports the main commodity was grain also traded were olive oil foodstuffs garum fish sauce slaves

prediction:  by sea from egyptian ports the main commodity was grain also traded were olive oil foodstuffs garum fish sauce the

 epoch: 5660 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 38%|███▊      | 5663/15000 [11:12<14:52, 10.46it/s]


 epoch: 5661 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5662 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5663 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 38%|███▊      | 5665/15000 [11:12<14:39, 10.61it/s]


 epoch: 5664 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5665 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5666 | train_loss: 0.24, train_acc: 97.5% | test_loss: 0.23, test_acc: 96.8%


 38%|███▊      | 5667/15000 [11:13<14:32, 10.70it/s]


 epoch: 5667 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5668 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 38%|███▊      | 5670/15000 [11:13<23:30,  6.61it/s]


 epoch: 5669 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

input:       in this synthesis the basis for heredity is in dna molecules that pass information from generation to generation the

target:      in this synthesis the basis for heredity is in dna molecules that pass information from generation to generation the processes

prediction:  in this synthesis the basis for heredity is in dna molecules that pass information from generation to generation the the

 epoch: 5670 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 38%|███▊      | 5672/15000 [11:13<20:14,  7.68it/s]


 epoch: 5671 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5672 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5673 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 38%|███▊      | 5676/15000 [11:14<16:06,  9.65it/s]


 epoch: 5674 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5675 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5676 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 38%|███▊      | 5678/15000 [11:14<15:06, 10.29it/s]


 epoch: 5677 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5678 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5679 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 38%|███▊      | 5680/15000 [11:14<15:39,  9.92it/s]


input:       as microsoft linq fourth generation programming languages gl are computer programming languages that aim to provide higher level of

target:      as microsoft linq fourth generation programming languages gl are computer programming languages that aim to provide higher level of abstraction

prediction:  as microsoft linq fourth generation programming languages gl are computer programming languages that aim to provide higher level of the

 epoch: 5680 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5681 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 38%|███▊      | 5682/15000 [11:14<15:22, 10.10it/s]


 epoch: 5682 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.8%


 38%|███▊      | 5684/15000 [11:15<25:30,  6.09it/s]


 epoch: 5683 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5684 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.6%

 epoch: 5685 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 38%|███▊      | 5688/15000 [11:15<19:10,  8.09it/s]


 epoch: 5686 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5687 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5688 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 38%|███▊      | 5690/15000 [11:16<18:02,  8.60it/s]


 epoch: 5689 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.0%

input:       against the stronger cultural influence of greek over time latin usage was used to project power and higher social

target:      against the stronger cultural influence of greek over time latin usage was used to project power and higher social class

prediction:  against the stronger cultural influence of greek over time latin usage was used to project power and higher social of

 epoch: 5690 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%


 38%|███▊      | 5692/15000 [11:16<17:54,  8.66it/s]


 epoch: 5691 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5692 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5693 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 38%|███▊      | 5696/15000 [11:16<15:18, 10.13it/s]


 epoch: 5694 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5695 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5696 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5697 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 38%|███▊      | 5700/15000 [11:17<17:25,  8.90it/s]


 epoch: 5698 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5699 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       legions were united fact hinted by the title gemina twin augustus also created nine special cohorts to maintain peace

target:      legions were united fact hinted by the title gemina twin augustus also created nine special cohorts to maintain peace in

prediction:  legions were united fact hinted by the title gemina twin augustus also created nine special cohorts to maintain peace the

 epoch: 5700 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%


 38%|███▊      | 5702/15000 [11:17<16:02,  9.66it/s]


 epoch: 5701 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5702 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5703 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.25, test_acc: 96.9%


 38%|███▊      | 5706/15000 [11:17<14:15, 10.86it/s]


 epoch: 5704 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5705 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5706 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 38%|███▊      | 5708/15000 [11:17<14:06, 10.98it/s]


 epoch: 5707 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5708 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5709 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 38%|███▊      | 5710/15000 [11:17<14:59, 10.33it/s]


input:       in the last part of the th century and spread throughout europe the invention and implementation of new technologies

target:      in the last part of the th century and spread throughout europe the invention and implementation of new technologies resulted

prediction:  in the last part of the th century and spread throughout europe the invention and implementation of new technologies the

 epoch: 5710 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5711 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 38%|███▊      | 5714/15000 [11:18<21:25,  7.22it/s]


 epoch: 5712 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5713 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5714 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 38%|███▊      | 5716/15000 [11:18<18:56,  8.17it/s]


 epoch: 5715 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5716 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5717 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 38%|███▊      | 5720/15000 [11:19<16:33,  9.34it/s]


 epoch: 5718 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5719 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

input:       is what do and actually have absolutely zero interest in creating traditional scm system from this initial design approach

target:      is what do and actually have absolutely zero interest in creating traditional scm system from this initial design approach git

prediction:  is what do and actually have absolutely zero interest in creating traditional scm system from this initial design approach the

 epoch: 5720 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 38%|███▊      | 5722/15000 [11:19<15:48,  9.78it/s]


 epoch: 5721 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5722 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5723 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 38%|███▊      | 5724/15000 [11:19<15:20, 10.08it/s]


 epoch: 5724 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5725 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 38%|███▊      | 5727/15000 [11:20<24:21,  6.34it/s]


 epoch: 5726 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5727 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 38%|███▊      | 5729/15000 [11:20<21:14,  7.27it/s]


 epoch: 5728 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5729 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.7%

input:       or everyone should have equal opportunity only the subjects with autism who lack the degree of inferential capacity normally

target:      or everyone should have equal opportunity only the subjects with autism who lack the degree of inferential capacity normally associated


 38%|███▊      | 5730/15000 [11:20<21:32,  7.17it/s]


prediction:  or everyone should have equal opportunity only the subjects with autism who lack the degree of inferential capacity normally the

 epoch: 5730 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5731 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 38%|███▊      | 5733/15000 [11:21<19:05,  8.09it/s]


 epoch: 5732 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 5733 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 38%|███▊      | 5735/15000 [11:21<18:44,  8.24it/s]


 epoch: 5734 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 5735 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 38%|███▊      | 5738/15000 [11:21<17:26,  8.85it/s]


 epoch: 5736 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5737 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5738 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 38%|███▊      | 5740/15000 [11:21<18:56,  8.15it/s]


 epoch: 5739 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

input:       greek word psyche for spirit or soul the latter part of the word psychology derives from logia which means

target:      greek word psyche for spirit or soul the latter part of the word psychology derives from logia which means study

prediction:  greek word psyche for spirit or soul the latter part of the word psychology derives from logia which means the

 epoch: 5740 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 38%|███▊      | 5742/15000 [11:22<21:44,  7.10it/s]


 epoch: 5741 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5742 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 38%|███▊      | 5744/15000 [11:22<18:38,  8.27it/s]


 epoch: 5743 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5744 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5745 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.0%


 38%|███▊      | 5747/15000 [11:22<17:35,  8.77it/s]


 epoch: 5746 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5747 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%


 38%|███▊      | 5749/15000 [11:22<16:59,  9.07it/s]


 epoch: 5748 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5749 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

input:       considered from their proximity not to belong to the continents of asia or america he defined oceania as including


 38%|███▊      | 5750/15000 [11:23<19:25,  7.94it/s]


target:      considered from their proximity not to belong to the continents of asia or america he defined oceania as including the

prediction:  considered from their proximity not to belong to the continents of asia or america he defined oceania as including the

 epoch: 5750 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5751 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 38%|███▊      | 5754/15000 [11:23<15:57,  9.66it/s]


 epoch: 5752 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5753 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5754 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 38%|███▊      | 5757/15000 [11:24<24:01,  6.41it/s]


 epoch: 5755 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%

 epoch: 5756 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5757 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 38%|███▊      | 5760/15000 [11:24<19:44,  7.80it/s]


 epoch: 5758 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5759 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

input:       took leading diplomatic role at the paris peace conference and advocated strongly for the to join the league

target:      took leading diplomatic role at the paris peace conference and advocated strongly for the to join the league of

prediction:  took leading diplomatic role at the paris peace conference and advocated strongly for the to join the league the

 epoch: 5760 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 38%|███▊      | 5762/15000 [11:24<17:18,  8.90it/s]


 epoch: 5761 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5762 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 5763 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 38%|███▊      | 5766/15000 [11:25<15:03, 10.22it/s]


 epoch: 5764 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5765 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5766 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 38%|███▊      | 5768/15000 [11:25<14:51, 10.35it/s]


 epoch: 5767 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.4%

 epoch: 5768 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 38%|███▊      | 5770/15000 [11:25<27:05,  5.68it/s]


 epoch: 5769 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

input:       to the point where no organized form of energy could be extracted from it scenario known as heat death

target:      to the point where no organized form of energy could be extracted from it scenario known as heat death modern

prediction:  to the point where no organized form of energy could be extracted from it scenario known as heat death the

 epoch: 5770 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5771 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 38%|███▊      | 5774/15000 [11:26<19:18,  7.96it/s]


 epoch: 5772 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5773 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.0%

 epoch: 5774 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 39%|███▊      | 5776/15000 [11:26<17:59,  8.55it/s]


 epoch: 5775 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5776 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5777 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.7%


 39%|███▊      | 5780/15000 [11:26<16:00,  9.60it/s]


 epoch: 5778 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5779 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

input:       classical physics includes the traditional branches and topics that were recognized and well developed before the beginning of the

target:      classical physics includes the traditional branches and topics that were recognized and well developed before the beginning of the th

prediction:  classical physics includes the traditional branches and topics that were recognized and well developed before the beginning of the the

 epoch: 5780 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 39%|███▊      | 5782/15000 [11:26<15:18, 10.04it/s]


 epoch: 5781 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5782 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5783 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 39%|███▊      | 5786/15000 [11:27<19:20,  7.94it/s]


 epoch: 5784 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5785 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5786 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 39%|███▊      | 5788/15000 [11:27<17:21,  8.85it/s]


 epoch: 5787 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5788 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5789 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 39%|███▊      | 5790/15000 [11:28<17:01,  9.02it/s]


input:       selection has become vital part of genetic engineering with selectable markers such as antibiotic resistance genes being used to

target:      selection has become vital part of genetic engineering with selectable markers such as antibiotic resistance genes being used to manipulate

prediction:  selection has become vital part of genetic engineering with selectable markers such as antibiotic resistance genes being used to the

 epoch: 5790 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.4%

 epoch: 5791 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%


 39%|███▊      | 5794/15000 [11:28<15:05, 10.16it/s]


 epoch: 5792 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 5793 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5794 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 39%|███▊      | 5796/15000 [11:28<14:43, 10.42it/s]


 epoch: 5795 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5796 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5797 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 39%|███▊      | 5800/15000 [11:29<21:35,  7.10it/s]


 epoch: 5798 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.4%

 epoch: 5799 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

input:       began to take steps to prevent these sites from using steamworks for gambling purposes and several of these sites

target:      began to take steps to prevent these sites from using steamworks for gambling purposes and several of these sites ceased

prediction:  began to take steps to prevent these sites from using steamworks for gambling purposes and several of these sites the

 epoch: 5800 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 39%|███▊      | 5802/15000 [11:29<18:59,  8.07it/s]


 epoch: 5801 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5802 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5803 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 39%|███▊      | 5806/15000 [11:29<15:42,  9.75it/s]


 epoch: 5804 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5805 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5806 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 39%|███▊      | 5808/15000 [11:30<14:51, 10.31it/s]


 epoch: 5807 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5808 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5809 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 39%|███▊      | 5810/15000 [11:30<15:19, 10.00it/s]


input:       and south of brazil vast road complex aims to link bras lia the federal capital to the south southeast

target:      and south of brazil vast road complex aims to link bras lia the federal capital to the south southeast northeast

prediction:  and south of brazil vast road complex aims to link bras lia the federal capital to the south southeast the

 epoch: 5810 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5811 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.2%


 39%|███▊      | 5812/15000 [11:30<14:59, 10.21it/s]


 epoch: 5812 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 39%|███▉      | 5814/15000 [11:31<24:26,  6.26it/s]


 epoch: 5813 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5814 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5815 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 39%|███▉      | 5818/15000 [11:31<18:23,  8.32it/s]


 epoch: 5816 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5817 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5818 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 39%|███▉      | 5820/15000 [11:31<17:39,  8.67it/s]


 epoch: 5819 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

input:       either athenian or pro athenian which is why far more is known about the history and politics of athens

target:      either athenian or pro athenian which is why far more is known about the history and politics of athens than

prediction:  either athenian or pro athenian which is why far more is known about the history and politics of athens the

 epoch: 5820 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 5821 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 39%|███▉      | 5824/15000 [11:31<14:59, 10.20it/s]


 epoch: 5822 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5823 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5824 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 39%|███▉      | 5826/15000 [11:32<14:40, 10.42it/s]


 epoch: 5825 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5826 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 39%|███▉      | 5828/15000 [11:32<22:50,  6.69it/s]


 epoch: 5827 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5828 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5829 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 39%|███▉      | 5830/15000 [11:32<20:44,  7.37it/s]


input:       phenomena and the development and analysis of experiments and theoretical physicists who specialize in mathematical modeling of physical systems

target:      phenomena and the development and analysis of experiments and theoretical physicists who specialize in mathematical modeling of physical systems to

prediction:  phenomena and the development and analysis of experiments and theoretical physicists who specialize in mathematical modeling of physical systems the

 epoch: 5830 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5831 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 39%|███▉      | 5834/15000 [11:33<17:11,  8.88it/s]


 epoch: 5832 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5833 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5834 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.27, test_acc: 96.8%


 39%|███▉      | 5836/15000 [11:33<17:16,  8.85it/s]


 epoch: 5835 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5836 | train_loss: 0.20, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 39%|███▉      | 5838/15000 [11:33<17:36,  8.67it/s]


 epoch: 5837 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5838 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.1%


 39%|███▉      | 5840/15000 [11:33<19:31,  7.82it/s]


 epoch: 5839 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%

input:       expansionism and the oregon treaty with britain led to control of the present day american northwest victory in

target:      expansionism and the oregon treaty with britain led to control of the present day american northwest victory in the

prediction:  expansionism and the oregon treaty with britain led to control of the present day american northwest victory in the

 epoch: 5840 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 39%|███▉      | 5842/15000 [11:34<32:02,  4.76it/s]


 epoch: 5841 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5842 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 39%|███▉      | 5844/15000 [11:34<24:55,  6.12it/s]


 epoch: 5843 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5844 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 39%|███▉      | 5846/15000 [11:35<22:00,  6.93it/s]


 epoch: 5845 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5846 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%


 39%|███▉      | 5849/15000 [11:35<18:26,  8.27it/s]


 epoch: 5847 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5848 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5849 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 39%|███▉      | 5850/15000 [11:35<19:54,  7.66it/s]


input:       of the roman province of macedonia in roman greece and later the province of achaea during the roman empire

target:      of the roman province of macedonia in roman greece and later the province of achaea during the roman empire classical

prediction:  of the roman province of macedonia in roman greece and later the province of achaea during the roman empire the

 epoch: 5850 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%

 epoch: 5851 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 39%|███▉      | 5853/15000 [11:35<17:45,  8.58it/s]


 epoch: 5852 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5853 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 39%|███▉      | 5855/15000 [11:36<18:15,  8.35it/s]


 epoch: 5854 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5855 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 96.9%


 39%|███▉      | 5857/15000 [11:36<27:19,  5.58it/s]


 epoch: 5856 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5857 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.0%

 epoch: 5858 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 39%|███▉      | 5860/15000 [11:36<20:17,  7.50it/s]


 epoch: 5859 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%

input:       ancient greek philosophy from thales first attempt to characterize matter to democritus deduction that matter ought to reduce to

target:      ancient greek philosophy from thales first attempt to characterize matter to democritus deduction that matter ought to reduce to an

prediction:  ancient greek philosophy from thales first attempt to characterize matter to democritus deduction that matter ought to reduce to the

 epoch: 5860 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.1%


 39%|███▉      | 5862/15000 [11:37<17:19,  8.79it/s]


 epoch: 5861 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5862 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 5863 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 39%|███▉      | 5866/15000 [11:37<14:39, 10.38it/s]


 epoch: 5864 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5865 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5866 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 39%|███▉      | 5868/15000 [11:37<14:28, 10.52it/s]


 epoch: 5867 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5868 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 5869 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

input:       and berber is triliteral or biliteral root of consonants and semiconsonants suffixes are added to form words the verb

target:      and berber is triliteral or biliteral root of consonants and semiconsonants suffixes are added to form words the verb conjugation

prediction:  and berber is triliteral or biliteral root of consonants and semiconsonants suffixes are added to form words the verb the


 39%|███▉      | 5872/15000 [11:38<22:01,  6.91it/s]


 epoch: 5870 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5871 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 5872 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 39%|███▉      | 5874/15000 [11:38<19:26,  7.83it/s]


 epoch: 5873 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5874 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5875 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 39%|███▉      | 5878/15000 [11:39<15:48,  9.62it/s]


 epoch: 5876 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5877 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5878 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 39%|███▉      | 5880/15000 [11:39<15:48,  9.61it/s]


 epoch: 5879 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

input:       by such an ethics committee the ethics code of the american psychological association originated in as ethical standards of

target:      by such an ethics committee the ethics code of the american psychological association originated in as ethical standards of psychologists

prediction:  by such an ethics committee the ethics code of the american psychological association originated in as ethical standards of the

 epoch: 5880 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 39%|███▉      | 5882/15000 [11:39<15:22,  9.88it/s]


 epoch: 5881 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5882 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 5883 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 39%|███▉      | 5886/15000 [11:40<22:08,  6.86it/s]


 epoch: 5884 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5885 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5886 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 39%|███▉      | 5888/15000 [11:40<19:27,  7.81it/s]


 epoch: 5887 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5888 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5889 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 39%|███▉      | 5890/15000 [11:40<18:32,  8.19it/s]


input:       that part of the api should be considered candidate for being removed or modified in backward incompatible way therefore

target:      that part of the api should be considered candidate for being removed or modified in backward incompatible way therefore these

prediction:  that part of the api should be considered candidate for being removed or modified in backward incompatible way therefore the

 epoch: 5890 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5891 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 39%|███▉      | 5894/15000 [11:40<15:41,  9.67it/s]


 epoch: 5892 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5893 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5894 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 39%|███▉      | 5896/15000 [11:41<15:09, 10.01it/s]


 epoch: 5895 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5896 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

 epoch: 5897 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 39%|███▉      | 5898/15000 [11:41<14:31, 10.45it/s]


 epoch: 5898 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 39%|███▉      | 5900/15000 [11:41<18:49,  8.05it/s]


 epoch: 5899 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       classes also have the advantage of students not needing to leave their house for morning class or worrying about

target:      classes also have the advantage of students not needing to leave their house for morning class or worrying about their

prediction:  classes also have the advantage of students not needing to leave their house for morning class or worrying about the

 epoch: 5900 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5901 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 39%|███▉      | 5904/15000 [11:42<15:37,  9.70it/s]


 epoch: 5902 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5903 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5904 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 39%|███▉      | 5906/15000 [11:42<14:52, 10.19it/s]


 epoch: 5905 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.26, test_acc: 97.0%

 epoch: 5906 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5907 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 39%|███▉      | 5908/15000 [11:42<14:29, 10.46it/s]


 epoch: 5908 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5909 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

input:       significant policy changes and political polarization citation needed the attack on the united states capitol of january attempting to

target:      significant policy changes and political polarization citation needed the attack on the united states capitol of january attempting to prevent

prediction:  significant policy changes and political polarization citation needed the attack on the united states capitol of january attempting to the


 39%|███▉      | 5912/15000 [11:42<14:41, 10.31it/s]


 epoch: 5910 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

 epoch: 5911 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5912 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 39%|███▉      | 5914/15000 [11:43<24:33,  6.17it/s]


 epoch: 5913 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5914 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5915 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 39%|███▉      | 5918/15000 [11:43<18:57,  7.99it/s]


 epoch: 5916 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5917 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 5918 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 39%|███▉      | 5920/15000 [11:44<17:54,  8.45it/s]


 epoch: 5919 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

input:       of classical light fields by macroscopic objects but on the fundamental properties of optical fields and their interactions with

target:      of classical light fields by macroscopic objects but on the fundamental properties of optical fields and their interactions with matter

prediction:  of classical light fields by macroscopic objects but on the fundamental properties of optical fields and their interactions with the

 epoch: 5920 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5921 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 39%|███▉      | 5924/15000 [11:44<15:38,  9.67it/s]


 epoch: 5922 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 5923 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5924 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 40%|███▉      | 5926/15000 [11:44<15:07, 10.00it/s]


 epoch: 5925 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5926 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 40%|███▉      | 5928/15000 [11:45<25:33,  5.92it/s]


 epoch: 5927 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 5928 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5929 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 40%|███▉      | 5930/15000 [11:45<22:31,  6.71it/s]


input:       behavior therapy among clinical psychologists increased key practice in behavioral and cognitive behavioral therapy is exposing patients to things

target:      behavior therapy among clinical psychologists increased key practice in behavioral and cognitive behavioral therapy is exposing patients to things they

prediction:  behavior therapy among clinical psychologists increased key practice in behavioral and cognitive behavioral therapy is exposing patients to things the

 epoch: 5930 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5931 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 40%|███▉      | 5934/15000 [11:45<17:43,  8.52it/s]


 epoch: 5932 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5933 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5934 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 40%|███▉      | 5936/15000 [11:45<16:51,  8.97it/s]


 epoch: 5935 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5936 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 5937 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 40%|███▉      | 5938/15000 [11:46<15:54,  9.49it/s]


 epoch: 5938 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5939 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       general rulings no longer responding to individual petitions although the senate could do little short of assassination and open

target:      general rulings no longer responding to individual petitions although the senate could do little short of assassination and open rebellion

prediction:  general rulings no longer responding to individual petitions although the senate could do little short of assassination and open the


 40%|███▉      | 5941/15000 [11:46<16:58,  8.90it/s]


 epoch: 5940 | train_loss: 0.25, train_acc: 96.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5941 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 40%|███▉      | 5943/15000 [11:46<17:09,  8.80it/s]


 epoch: 5942 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 5943 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%


 40%|███▉      | 5945/15000 [11:46<17:45,  8.50it/s]


 epoch: 5944 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5945 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 40%|███▉      | 5947/15000 [11:47<18:05,  8.34it/s]


 epoch: 5946 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5947 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 40%|███▉      | 5949/15000 [11:47<19:06,  7.89it/s]


 epoch: 5948 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5949 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%


 40%|███▉      | 5950/15000 [11:47<22:51,  6.60it/s]


input:       of the ptolemaic dynasty after his death following the conquest of north africa mediterranean coastline by the roman empire

target:      of the ptolemaic dynasty after his death following the conquest of north africa mediterranean coastline by the roman empire the

prediction:  of the ptolemaic dynasty after his death following the conquest of north africa mediterranean coastline by the roman empire the

 epoch: 5950 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 40%|███▉      | 5952/15000 [11:47<20:43,  7.28it/s]


 epoch: 5951 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 5952 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 40%|███▉      | 5954/15000 [11:48<20:50,  7.23it/s]


 epoch: 5953 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 5954 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 40%|███▉      | 5955/15000 [11:48<20:24,  7.39it/s]


 epoch: 5955 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 40%|███▉      | 5957/15000 [11:49<34:24,  4.38it/s]


 epoch: 5956 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5957 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 40%|███▉      | 5959/15000 [11:49<25:49,  5.84it/s]


 epoch: 5958 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5959 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%


 40%|███▉      | 5960/15000 [11:49<26:02,  5.79it/s]


input:       in an object in addition powershell allows formatting definitions to be specified so the text representation of objects can

target:      in an object in addition powershell allows formatting definitions to be specified so the text representation of objects can be

prediction:  in an object in addition powershell allows formatting definitions to be specified so the text representation of objects can the

 epoch: 5960 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 40%|███▉      | 5962/15000 [11:49<21:04,  7.15it/s]


 epoch: 5961 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.2%

 epoch: 5962 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5963 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 40%|███▉      | 5964/15000 [11:49<18:30,  8.13it/s]



 epoch: 5964 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 40%|███▉      | 5966/15000 [11:50<17:10,  8.77it/s]


 epoch: 5965 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5966 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 5967 | train_loss: 0.20, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 40%|███▉      | 5969/15000 [11:50<15:16,  9.85it/s]


 epoch: 5968 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5969 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       vandals and alamanni along the rhine and danube rivers in the western part of the empire as well as

target:      vandals and alamanni along the rhine and danube rivers in the western part of the empire as well as attacks

prediction:  vandals and alamanni along the rhine and danube rivers in the western part of the empire as well as the


 40%|███▉      | 5972/15000 [11:51<25:19,  5.94it/s]


 epoch: 5970 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 5971 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 5972 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 40%|███▉      | 5974/15000 [11:51<20:43,  7.26it/s]


 epoch: 5973 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5974 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5975 | train_loss: 0.22, train_acc: 97.5% | test_loss: 0.24, test_acc: 97.0%


 40%|███▉      | 5978/15000 [11:51<15:59,  9.41it/s]


 epoch: 5976 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 5977 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.3%

 epoch: 5978 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 40%|███▉      | 5980/15000 [11:51<16:01,  9.38it/s]


 epoch: 5979 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

input:       the germanic and slav tribes established their domains over western and eastern europe respectively eventually the frankish tribes were

target:      the germanic and slav tribes established their domains over western and eastern europe respectively eventually the frankish tribes were united

prediction:  the germanic and slav tribes established their domains over western and eastern europe respectively eventually the frankish tribes were the

 epoch: 5980 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5981 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 40%|███▉      | 5982/15000 [11:52<14:57, 10.05it/s]


 epoch: 5982 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5983 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 40%|███▉      | 5986/15000 [11:52<18:27,  8.14it/s]


 epoch: 5984 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5985 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5986 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 40%|███▉      | 5988/15000 [11:52<16:41,  9.00it/s]


 epoch: 5987 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 5988 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 5989 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 40%|███▉      | 5990/15000 [11:53<16:29,  9.10it/s]


input:       evropa clickable map of europe showing one of the most commonly used continental boundaries key blue states which straddle

target:      evropa clickable map of europe showing one of the most commonly used continental boundaries key blue states which straddle the

prediction:  evropa clickable map of europe showing one of the most commonly used continental boundaries key blue states which straddle the

 epoch: 5990 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5991 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 40%|███▉      | 5994/15000 [11:53<14:43, 10.19it/s]


 epoch: 5992 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 5993 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 5994 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 40%|███▉      | 5996/15000 [11:53<14:18, 10.48it/s]


 epoch: 5995 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 5996 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 5997 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.1%


 40%|███▉      | 5998/15000 [11:53<14:06, 10.63it/s]


 epoch: 5998 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 40%|████      | 6000/15000 [11:54<24:20,  6.16it/s]


 epoch: 5999 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       most popular professional team sports with the top leagues being the national basketball association and the national hockey league

target:      most popular professional team sports with the top leagues being the national basketball association and the national hockey league which

prediction:  most popular professional team sports with the top leagues being the national basketball association and the national hockey league the

 epoch: 6000 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 40%|████      | 6002/15000 [11:54<21:08,  7.09it/s]


 epoch: 6001 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.4%

 epoch: 6002 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6003 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 40%|████      | 6006/15000 [11:54<16:45,  8.94it/s]


 epoch: 6004 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6005 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 6006 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 40%|████      | 6008/15000 [11:55<15:37,  9.60it/s]


 epoch: 6007 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6008 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6009 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 40%|████      | 6010/15000 [11:55<15:51,  9.44it/s]


input:       between specific sign form and its meaning thus languages must have vocabulary of signs related to specific meaning the

target:      between specific sign form and its meaning thus languages must have vocabulary of signs related to specific meaning the english

prediction:  between specific sign form and its meaning thus languages must have vocabulary of signs related to specific meaning the the

 epoch: 6010 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6011 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 40%|████      | 6012/15000 [11:55<15:14,  9.83it/s]


 epoch: 6012 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 40%|████      | 6015/15000 [11:56<23:21,  6.41it/s]


 epoch: 6013 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6014 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 6015 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 40%|████      | 6017/15000 [11:56<19:49,  7.55it/s]


 epoch: 6016 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6017 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 6018 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 40%|████      | 6019/15000 [11:56<17:48,  8.40it/s]


 epoch: 6019 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       cultivated wild rice the iroquois confederation haudenosaunee located in the southern great lakes region was established between the th

target:      cultivated wild rice the iroquois confederation haudenosaunee located in the southern great lakes region was established between the th and

prediction:  cultivated wild rice the iroquois confederation haudenosaunee located in the southern great lakes region was established between the th the

 epoch: 6020 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 40%|████      | 6023/15000 [11:57<16:05,  9.30it/s]


 epoch: 6021 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6022 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6023 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 40%|████      | 6025/15000 [11:57<15:20,  9.75it/s]


 epoch: 6024 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6025 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6026 | train_loss: 0.28, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%


 40%|████      | 6029/15000 [11:57<14:43, 10.15it/s]


 epoch: 6027 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6028 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6029 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 40%|████      | 6031/15000 [11:57<14:57,  9.99it/s]


input:       provide services to others in early societies there was little specialization and each child would generally learn most of

target:      provide services to others in early societies there was little specialization and each child would generally learn most of the

prediction:  provide services to others in early societies there was little specialization and each child would generally learn most of the

 epoch: 6030 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6031 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%


 40%|████      | 6033/15000 [11:57<14:36, 10.23it/s]


 epoch: 6032 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6033 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6034 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 40%|████      | 6037/15000 [11:58<14:00, 10.66it/s]


 epoch: 6035 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6036 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6037 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 40%|████      | 6039/15000 [11:58<14:07, 10.58it/s]


 epoch: 6038 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6039 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

input:       of saint luke the guild of artists and doctors of medicine but even after his father set him up

target:      of saint luke the guild of artists and doctors of medicine but even after his father set him up in

prediction:  of saint luke the guild of artists and doctors of medicine but even after his father set him up the


 40%|████      | 6041/15000 [11:58<14:46, 10.11it/s]


 epoch: 6040 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6041 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 40%|████      | 6043/15000 [11:59<24:30,  6.09it/s]


 epoch: 6042 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6043 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 6044 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 40%|████      | 6047/15000 [11:59<18:41,  7.98it/s]


 epoch: 6045 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6046 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6047 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 40%|████      | 6048/15000 [11:59<18:02,  8.27it/s]


 epoch: 6048 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6049 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

input:       into eordaia bottiaea mygdonia and almopia regions settled by thracian tribes to the north of macedonia lay various non

target:      into eordaia bottiaea mygdonia and almopia regions settled by thracian tribes to the north of macedonia lay various non greek

prediction:  into eordaia bottiaea mygdonia and almopia regions settled by thracian tribes to the north of macedonia lay various non the


 40%|████      | 6051/15000 [12:00<17:54,  8.33it/s]


 epoch: 6050 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6051 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 40%|████      | 6053/15000 [12:00<18:12,  8.19it/s]


 epoch: 6052 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6053 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 40%|████      | 6055/15000 [12:00<18:06,  8.23it/s]


 epoch: 6054 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6055 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 40%|████      | 6057/15000 [12:01<31:43,  4.70it/s]


 epoch: 6056 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6057 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 40%|████      | 6059/15000 [12:01<25:00,  5.96it/s]


 epoch: 6058 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6059 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

input:       difficulty while the urban city dwellers were more advanced technologically and socially in many cases they could do little


 40%|████      | 6060/15000 [12:01<25:53,  5.75it/s]


target:      difficulty while the urban city dwellers were more advanced technologically and socially in many cases they could do little in

prediction:  difficulty while the urban city dwellers were more advanced technologically and socially in many cases they could do little the

 epoch: 6060 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 40%|████      | 6062/15000 [12:02<22:04,  6.75it/s]


 epoch: 6061 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6062 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 40%|████      | 6064/15000 [12:02<21:24,  6.96it/s]


 epoch: 6063 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6064 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 40%|████      | 6066/15000 [12:02<19:50,  7.51it/s]


 epoch: 6065 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6066 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 40%|████      | 6068/15000 [12:02<19:12,  7.75it/s]


 epoch: 6067 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6068 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 40%|████      | 6069/15000 [12:02<18:46,  7.93it/s]


 epoch: 6069 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       amber additionally in the th century bce aristotle made critical observations of the slow rate of geological change he

target:      amber additionally in the th century bce aristotle made critical observations of the slow rate of geological change he observed

prediction:  amber additionally in the th century bce aristotle made critical observations of the slow rate of geological change he the


 40%|████      | 6071/15000 [12:03<27:17,  5.45it/s]


 epoch: 6070 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%

 epoch: 6071 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 40%|████      | 6073/15000 [12:03<22:16,  6.68it/s]


 epoch: 6072 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6073 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%


 40%|████      | 6075/15000 [12:03<18:53,  7.88it/s]


 epoch: 6074 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6075 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6076 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 41%|████      | 6079/15000 [12:04<14:36, 10.18it/s]


 epoch: 6077 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6078 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6079 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 41%|████      | 6081/15000 [12:04<15:43,  9.45it/s]


input:       self reporting it remains challenging to draw hard conclusions about where in the brain specific thoughts originate or even

target:      self reporting it remains challenging to draw hard conclusions about where in the brain specific thoughts originate or even how

prediction:  self reporting it remains challenging to draw hard conclusions about where in the brain specific thoughts originate or even of

 epoch: 6080 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6081 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 41%|████      | 6084/15000 [12:04<14:58,  9.92it/s]


 epoch: 6082 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 6083 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 6084 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 41%|████      | 6087/15000 [12:05<23:35,  6.30it/s]


 epoch: 6085 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 6086 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6087 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%


 41%|████      | 6090/15000 [12:05<19:05,  7.78it/s]


 epoch: 6088 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6089 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       it could be used as server out of the box it is shipped with built in command git daemon

target:      it could be used as server out of the box it is shipped with built in command git daemon which

prediction:  it could be used as server out of the box it is shipped with built in command git daemon the

 epoch: 6090 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 41%|████      | 6092/15000 [12:06<16:57,  8.75it/s]


 epoch: 6091 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6092 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6093 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 41%|████      | 6096/15000 [12:06<14:42, 10.09it/s]


 epoch: 6094 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6095 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6096 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.28, test_acc: 96.9%


 41%|████      | 6098/15000 [12:06<14:14, 10.42it/s]


 epoch: 6097 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6098 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6099 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 41%|████      | 6100/15000 [12:06<14:53,  9.96it/s]


input:       years for males for females during the period of republican expansionism when slavery had become pervasive war captives were

target:      years for males for females during the period of republican expansionism when slavery had become pervasive war captives were main

prediction:  years for males for females during the period of republican expansionism when slavery had become pervasive war captives were the

 epoch: 6100 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6101 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 41%|████      | 6104/15000 [12:07<13:39, 10.86it/s]


 epoch: 6102 | train_loss: 0.25, train_acc: 96.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6103 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6104 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 41%|████      | 6106/15000 [12:07<13:25, 11.05it/s]


 epoch: 6105 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6106 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6107 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 41%|████      | 6108/15000 [12:07<13:11, 11.24it/s]


 epoch: 6108 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6109 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       perceived the preservation of the ottoman empire to be in their best interests meanwhile the serbian revolution and greek

target:      perceived the preservation of the ottoman empire to be in their best interests meanwhile the serbian revolution and greek war

prediction:  perceived the preservation of the ottoman empire to be in their best interests meanwhile the serbian revolution and greek the


 41%|████      | 6112/15000 [12:07<13:51, 10.69it/s]


 epoch: 6110 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6111 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.6%

 epoch: 6112 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 41%|████      | 6114/15000 [12:08<15:29,  9.56it/s]


 epoch: 6113 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6114 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6115 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 41%|████      | 6118/15000 [12:08<14:08, 10.47it/s]


 epoch: 6116 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6117 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%

 epoch: 6118 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 41%|████      | 6120/15000 [12:08<14:41, 10.08it/s]


 epoch: 6119 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

input:       also founded in egypt and libya modern syracuse naples marseille and istanbul had their beginnings as the greek colonies

target:      also founded in egypt and libya modern syracuse naples marseille and istanbul had their beginnings as the greek colonies syracusae

prediction:  also founded in egypt and libya modern syracuse naples marseille and istanbul had their beginnings as the greek colonies the

 epoch: 6120 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 41%|████      | 6122/15000 [12:08<14:25, 10.26it/s]


 epoch: 6121 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6122 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 6123 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 41%|████      | 6126/15000 [12:09<14:11, 10.42it/s]


 epoch: 6124 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6125 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6126 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6127 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 41%|████      | 6128/15000 [12:09<24:44,  5.98it/s]


 epoch: 6128 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6129 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       aristotle divides comedy into three categories or subgenres farce romantic comedy and satire on the other hand plato taught

target:      aristotle divides comedy into three categories or subgenres farce romantic comedy and satire on the other hand plato taught that

prediction:  aristotle divides comedy into three categories or subgenres farce romantic comedy and satire on the other hand plato taught the


 41%|████      | 6131/15000 [12:10<20:43,  7.13it/s]


 epoch: 6130 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6131 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.4%

 epoch: 6132 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.2%


 41%|████      | 6135/15000 [12:10<16:25,  9.00it/s]


 epoch: 6133 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6134 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.7%

 epoch: 6135 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 41%|████      | 6137/15000 [12:10<15:22,  9.61it/s]


 epoch: 6136 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6137 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6138 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 41%|████      | 6139/15000 [12:10<14:59,  9.85it/s]


 epoch: 6139 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

input:       can be specified using type grammar they are context free grammars some languages including perl and lisp contain

target:      can be specified using type grammar they are context free grammars some languages including perl and lisp contain constructs

prediction:  can be specified using type grammar they are context free grammars some languages including perl and lisp contain the

 epoch: 6140 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 41%|████      | 6141/15000 [12:11<15:40,  9.42it/s]


 epoch: 6141 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.19, test_acc: 97.3%


 41%|████      | 6144/15000 [12:11<21:51,  6.75it/s]


 epoch: 6142 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6143 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6144 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 41%|████      | 6147/15000 [12:12<17:32,  8.41it/s]


 epoch: 6145 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6146 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6147 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 41%|████      | 6149/15000 [12:12<15:54,  9.27it/s]


 epoch: 6148 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6149 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       local consumption however the export of agricultural products is essential for the balance of trade in most countries the

target:      local consumption however the export of agricultural products is essential for the balance of trade in most countries the main

prediction:  local consumption however the export of agricultural products is essential for the balance of trade in most countries the the

 epoch: 6150 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 41%|████      | 6153/15000 [12:12<14:55,  9.88it/s]


 epoch: 6151 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6152 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6153 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.3%


 41%|████      | 6155/15000 [12:12<14:48,  9.95it/s]


 epoch: 6154 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6155 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%

 epoch: 6156 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 41%|████      | 6159/15000 [12:13<20:18,  7.25it/s]


 epoch: 6157 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6158 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6159 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 41%|████      | 6161/15000 [12:13<19:56,  7.39it/s]


input:       led to other renewed efforts in the discipline to re test important findings in response to concerns about publication

target:      led to other renewed efforts in the discipline to re test important findings in response to concerns about publication bias

prediction:  led to other renewed efforts in the discipline to re test important findings in response to concerns about publication the

 epoch: 6160 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6161 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 41%|████      | 6163/15000 [12:14<18:47,  7.84it/s]


 epoch: 6162 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6163 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 41%|████      | 6165/15000 [12:14<18:02,  8.16it/s]


 epoch: 6164 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6165 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 41%|████      | 6167/15000 [12:14<19:00,  7.75it/s]


 epoch: 6166 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6167 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 41%|████      | 6169/15000 [12:14<18:02,  8.16it/s]


 epoch: 6168 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6169 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 41%|████      | 6170/15000 [12:15<19:56,  7.38it/s]


input:       as cleon and cleophon there was brief reaction against democracy aided by the spartan army the rule of the

target:      as cleon and cleophon there was brief reaction against democracy aided by the spartan army the rule of the thirty

prediction:  as cleon and cleophon there was brief reaction against democracy aided by the spartan army the rule of the the

 epoch: 6170 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%


 41%|████      | 6172/15000 [12:15<31:58,  4.60it/s]


 epoch: 6171 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 6172 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 41%|████      | 6174/15000 [12:15<24:55,  5.90it/s]


 epoch: 6173 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 6174 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 41%|████      | 6176/15000 [12:16<20:35,  7.14it/s]


 epoch: 6175 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6176 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 41%|████      | 6178/15000 [12:16<18:57,  7.75it/s]


 epoch: 6177 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6178 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 41%|████      | 6180/15000 [12:16<21:58,  6.69it/s]


 epoch: 6179 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

input:       domination these tensions were exacerbated in bc when athens sent force to aid sparta in overcoming helot revolt but

target:      domination these tensions were exacerbated in bc when athens sent force to aid sparta in overcoming helot revolt but this

prediction:  domination these tensions were exacerbated in bc when athens sent force to aid sparta in overcoming helot revolt but the

 epoch: 6180 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 41%|████      | 6182/15000 [12:17<20:26,  7.19it/s]


 epoch: 6181 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 6182 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 41%|████      | 6183/15000 [12:17<20:08,  7.29it/s]


 epoch: 6183 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6184 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 41%|████      | 6187/15000 [12:17<20:09,  7.29it/s]


 epoch: 6185 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6186 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6187 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 41%|████▏     | 6188/15000 [12:17<18:57,  7.74it/s]


 epoch: 6188 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6189 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

input:       june fidelity the lead investor in reddit funding round in devalued its investment in reddit by it was revealed

target:      june fidelity the lead investor in reddit funding round in devalued its investment in reddit by it was revealed in

prediction:  june fidelity the lead investor in reddit funding round in devalued its investment in reddit by it was revealed the


 41%|████▏     | 6192/15000 [12:18<15:53,  9.24it/s]


 epoch: 6190 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6191 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6192 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 41%|████▏     | 6194/15000 [12:18<15:06,  9.72it/s]


 epoch: 6193 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6194 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6195 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 41%|████▏     | 6198/15000 [12:18<13:50, 10.60it/s]


 epoch: 6196 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6197 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6198 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 41%|████▏     | 6200/15000 [12:19<19:54,  7.37it/s]


 epoch: 6199 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       areas in both canada and the north america occupies the northern portion of the landmass generally referred to

target:      areas in both canada and the north america occupies the northern portion of the landmass generally referred to as

prediction:  areas in both canada and the north america occupies the northern portion of the landmass generally referred to the

 epoch: 6200 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6201 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 41%|████▏     | 6204/15000 [12:19<15:53,  9.22it/s]


 epoch: 6202 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%

 epoch: 6203 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.4%

 epoch: 6204 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 41%|████▏     | 6206/15000 [12:19<14:55,  9.82it/s]


 epoch: 6205 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6206 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6207 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 41%|████▏     | 6208/15000 [12:19<14:16, 10.27it/s]


 epoch: 6208 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6209 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%

input:       john talking to and not john is talking to who the latter example may be used as way of

target:      john talking to and not john is talking to who the latter example may be used as way of placing

prediction:  john talking to and not john is talking to who the latter example may be used as way of the


 41%|████▏     | 6212/15000 [12:20<14:21, 10.20it/s]


 epoch: 6210 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6211 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6212 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6213 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 41%|████▏     | 6216/15000 [12:21<21:18,  6.87it/s]


 epoch: 6214 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 6215 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6216 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 41%|████▏     | 6219/15000 [12:21<17:27,  8.38it/s]


 epoch: 6217 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6218 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6219 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 41%|████▏     | 6221/15000 [12:21<16:52,  8.67it/s]


input:       politics philosophy architecture sculpture history and literature he fostered arts and literature and gave to athens splendor which would

target:      politics philosophy architecture sculpture history and literature he fostered arts and literature and gave to athens splendor which would never

prediction:  politics philosophy architecture sculpture history and literature he fostered arts and literature and gave to athens splendor which would of

 epoch: 6220 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6221 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 41%|████▏     | 6223/15000 [12:21<15:35,  9.38it/s]


 epoch: 6222 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6223 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6224 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 42%|████▏     | 6227/15000 [12:22<14:09, 10.32it/s]


 epoch: 6225 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6226 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6227 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 42%|████▏     | 6230/15000 [12:22<19:52,  7.36it/s]


 epoch: 6228 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6229 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

input:       predict future experimental results while experimentalists devise and perform experiments to test theoretical predictions and explore new phenomena although

target:      predict future experimental results while experimentalists devise and perform experiments to test theoretical predictions and explore new phenomena although theory

prediction:  predict future experimental results while experimentalists devise and perform experiments to test theoretical predictions and explore new phenomena although the

 epoch: 6230 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 42%|████▏     | 6232/15000 [12:22<17:29,  8.36it/s]


 epoch: 6231 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%

 epoch: 6232 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6233 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.4%


 42%|████▏     | 6236/15000 [12:23<14:38,  9.98it/s]


 epoch: 6234 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6235 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6236 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 42%|████▏     | 6238/15000 [12:23<14:09, 10.32it/s]


 epoch: 6237 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6238 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6239 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 42%|████▏     | 6240/15000 [12:23<14:45,  9.89it/s]


input:       open source code in perpetuity github mascot is an anthropomorphized octocat with five octopus like arms the character was

target:      open source code in perpetuity github mascot is an anthropomorphized octocat with five octopus like arms the character was created

prediction:  open source code in perpetuity github mascot is an anthropomorphized octocat with five octopus like arms the character was the

 epoch: 6240 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6241 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 42%|████▏     | 6244/15000 [12:24<14:30, 10.05it/s]


 epoch: 6242 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6243 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

 epoch: 6244 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 42%|████▏     | 6246/15000 [12:24<14:18, 10.19it/s]


 epoch: 6245 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6246 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6247 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 42%|████▏     | 6248/15000 [12:24<13:52, 10.51it/s]


 epoch: 6248 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6249 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

input:       from native american and native hawaiian activities that predate european contact the market for professional sports in the united

target:      from native american and native hawaiian activities that predate european contact the market for professional sports in the united states

prediction:  from native american and native hawaiian activities that predate european contact the market for professional sports in the united the


 42%|████▏     | 6252/15000 [12:24<13:56, 10.46it/s]


 epoch: 6250 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 6251 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6252 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%


 42%|████▏     | 6254/15000 [12:25<13:57, 10.45it/s]


 epoch: 6253 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.1%

 epoch: 6254 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6255 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 42%|████▏     | 6256/15000 [12:25<14:07, 10.32it/s]


 epoch: 6256 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 42%|████▏     | 6258/15000 [12:25<23:11,  6.28it/s]


 epoch: 6257 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6258 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6259 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 42%|████▏     | 6260/15000 [12:26<21:02,  6.92it/s]


input:       spectacle venue and stadium greek style athletics included footraces boxing wrestling and the pancratium aquatic displays such as the

target:      spectacle venue and stadium greek style athletics included footraces boxing wrestling and the pancratium aquatic displays such as the mock

prediction:  spectacle venue and stadium greek style athletics included footraces boxing wrestling and the pancratium aquatic displays such as the the

 epoch: 6260 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6261 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 42%|████▏     | 6263/15000 [12:26<18:05,  8.05it/s]


 epoch: 6262 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.1%

 epoch: 6263 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6264 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 42%|████▏     | 6267/15000 [12:26<14:59,  9.71it/s]


 epoch: 6265 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6266 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6267 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 42%|████▏     | 6269/15000 [12:26<14:18, 10.17it/s]


 epoch: 6268 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6269 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

input:       of the bear northern the greek philosopher aristotle wrote in meteorology about an antarctic region in bce the greek

target:      of the bear northern the greek philosopher aristotle wrote in meteorology about an antarctic region in bce the greek geographer

prediction:  of the bear northern the greek philosopher aristotle wrote in meteorology about an antarctic region in bce the greek the

 epoch: 6270 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 42%|████▏     | 6273/15000 [12:27<16:26,  8.85it/s]


 epoch: 6271 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6272 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.3%

 epoch: 6273 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 42%|████▏     | 6275/15000 [12:27<16:33,  8.78it/s]


 epoch: 6274 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6275 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%


 42%|████▏     | 6277/15000 [12:27<16:50,  8.63it/s]


 epoch: 6276 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.0%

 epoch: 6277 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%


 42%|████▏     | 6278/15000 [12:27<16:16,  8.93it/s]


 epoch: 6278 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6279 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

input:       less known temagami magnetic anomaly has striking similarities to the sudbury basin its magnetic anomalies are very similar to

target:      less known temagami magnetic anomaly has striking similarities to the sudbury basin its magnetic anomalies are very similar to the


 42%|████▏     | 6281/15000 [12:28<17:24,  8.35it/s]


prediction:  less known temagami magnetic anomaly has striking similarities to the sudbury basin its magnetic anomalies are very similar to the

 epoch: 6280 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6281 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 42%|████▏     | 6283/15000 [12:28<17:35,  8.26it/s]


 epoch: 6282 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 6283 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 42%|████▏     | 6284/15000 [12:28<17:36,  8.25it/s]


 epoch: 6284 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 42%|████▏     | 6286/15000 [12:29<30:48,  4.72it/s]


 epoch: 6285 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6286 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%


 42%|████▏     | 6288/15000 [12:29<23:06,  6.28it/s]


 epoch: 6287 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6288 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%


 42%|████▏     | 6290/15000 [12:29<21:15,  6.83it/s]


 epoch: 6289 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

input:       complicated system of direct and indirect taxes some paid in cash and some in kind taxes might be specific

target:      complicated system of direct and indirect taxes some paid in cash and some in kind taxes might be specific to

prediction:  complicated system of direct and indirect taxes some paid in cash and some in kind taxes might be specific the

 epoch: 6290 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 42%|████▏     | 6292/15000 [12:30<18:35,  7.81it/s]


 epoch: 6291 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6292 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 42%|████▏     | 6294/15000 [12:30<17:59,  8.06it/s]


 epoch: 6293 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6294 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%


 42%|████▏     | 6296/15000 [12:30<18:19,  7.92it/s]


 epoch: 6295 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6296 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 42%|████▏     | 6298/15000 [12:30<18:35,  7.80it/s]


 epoch: 6297 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.4%

 epoch: 6298 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 42%|████▏     | 6299/15000 [12:30<18:31,  7.83it/s]


 epoch: 6299 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.1%

input:       subjects with autism who lack the degree of inferential capacity normally associated with aspects of theory of mind came

target:      subjects with autism who lack the degree of inferential capacity normally associated with aspects of theory of mind came close

prediction:  subjects with autism who lack the degree of inferential capacity normally associated with aspects of theory of mind came the


 42%|████▏     | 6302/15000 [12:31<27:25,  5.28it/s]


 epoch: 6300 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6301 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6302 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 42%|████▏     | 6305/15000 [12:32<19:33,  7.41it/s]


 epoch: 6303 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 6304 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6305 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%


 42%|████▏     | 6308/15000 [12:32<16:01,  9.04it/s]


 epoch: 6306 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 6307 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6308 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 42%|████▏     | 6310/15000 [12:32<16:09,  8.97it/s]


 epoch: 6309 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       had been resurrected first by sulla in the late bc and then by julius caesar in the mid the

target:      had been resurrected first by sulla in the late bc and then by julius caesar in the mid the title

prediction:  had been resurrected first by sulla in the late bc and then by julius caesar in the mid the the

 epoch: 6310 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 42%|████▏     | 6313/15000 [12:32<14:54,  9.72it/s]


 epoch: 6311 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6312 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6313 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%


 42%|████▏     | 6315/15000 [12:33<25:22,  5.71it/s]


 epoch: 6314 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6315 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6316 | train_loss: 0.20, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 42%|████▏     | 6319/15000 [12:33<18:05,  8.00it/s]


 epoch: 6317 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6318 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6319 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 42%|████▏     | 6321/15000 [12:34<17:39,  8.19it/s]


input:       republicans and democrats presently are the two major parties and the country is currently in either the fifth or

target:      republicans and democrats presently are the two major parties and the country is currently in either the fifth or sixth

prediction:  republicans and democrats presently are the two major parties and the country is currently in either the fifth or the

 epoch: 6320 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6321 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 42%|████▏     | 6323/15000 [12:34<16:02,  9.01it/s]


 epoch: 6322 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6323 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6324 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 42%|████▏     | 6327/15000 [12:34<14:30,  9.96it/s]


 epoch: 6325 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6326 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6327 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6328 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 42%|████▏     | 6330/15000 [12:35<22:35,  6.40it/s]


 epoch: 6329 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

input:       the th century the traditional division of the landmass of eurasia into two continents europe and asia followed ptolemy

target:      the th century the traditional division of the landmass of eurasia into two continents europe and asia followed ptolemy with

prediction:  the th century the traditional division of the landmass of eurasia into two continents europe and asia followed ptolemy the

 epoch: 6330 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6331 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.6%


 42%|████▏     | 6332/15000 [12:35<19:20,  7.47it/s]


 epoch: 6332 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

 epoch: 6333 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 42%|████▏     | 6336/15000 [12:35<15:25,  9.36it/s]


 epoch: 6334 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6335 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6336 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 42%|████▏     | 6338/15000 [12:36<14:41,  9.83it/s]


 epoch: 6337 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6338 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6339 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 42%|████▏     | 6340/15000 [12:36<15:06,  9.55it/s]


input:       the situation is the opposite and new pronouns can be constructed whereas the number of adjectives is fixed word

target:      the situation is the opposite and new pronouns can be constructed whereas the number of adjectives is fixed word classes

prediction:  the situation is the opposite and new pronouns can be constructed whereas the number of adjectives is fixed word the

 epoch: 6340 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6341 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 42%|████▏     | 6342/15000 [12:36<14:45,  9.78it/s]


 epoch: 6342 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 42%|████▏     | 6344/15000 [12:37<24:18,  5.93it/s]


 epoch: 6343 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6344 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%

 epoch: 6345 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 42%|████▏     | 6348/15000 [12:37<17:59,  8.01it/s]


 epoch: 6346 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 6347 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 6348 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 42%|████▏     | 6350/15000 [12:37<16:59,  8.48it/s]


 epoch: 6349 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       tropic of capricorn in the southern temperate zone africa is highly biodiverse it is the continent with the largest

target:      tropic of capricorn in the southern temperate zone africa is highly biodiverse it is the continent with the largest number

prediction:  tropic of capricorn in the southern temperate zone africa is highly biodiverse it is the continent with the largest the

 epoch: 6350 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6351 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 42%|████▏     | 6354/15000 [12:37<14:45,  9.76it/s]


 epoch: 6352 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6353 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6354 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 42%|████▏     | 6356/15000 [12:38<14:35,  9.87it/s]


 epoch: 6355 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6356 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 42%|████▏     | 6358/15000 [12:38<24:35,  5.86it/s]


 epoch: 6357 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%

 epoch: 6358 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6359 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 42%|████▏     | 6360/15000 [12:39<21:39,  6.65it/s]


input:       and sustained technological advances in computing and data communication widespread access to the internet emerged as the cost of

target:      and sustained technological advances in computing and data communication widespread access to the internet emerged as the cost of infrastructure

prediction:  and sustained technological advances in computing and data communication widespread access to the internet emerged as the cost of the

 epoch: 6360 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6361 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 42%|████▏     | 6364/15000 [12:39<16:55,  8.51it/s]


 epoch: 6362 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6363 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6364 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 42%|████▏     | 6366/15000 [12:39<15:29,  9.29it/s]


 epoch: 6365 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6366 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6367 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%


 42%|████▏     | 6368/15000 [12:39<14:51,  9.68it/s]


 epoch: 6368 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6369 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       two or more points in time sometimes the purpose of longitudinal research is to study trends across time such

target:      two or more points in time sometimes the purpose of longitudinal research is to study trends across time such as

prediction:  two or more points in time sometimes the purpose of longitudinal research is to study trends across time such the


 42%|████▏     | 6370/15000 [12:39<14:47,  9.72it/s]


 epoch: 6370 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6371 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 42%|████▏     | 6373/15000 [12:40<22:32,  6.38it/s]


 epoch: 6372 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6373 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6374 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 43%|████▎     | 6377/15000 [12:41<16:48,  8.55it/s]


 epoch: 6375 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6376 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6377 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 43%|████▎     | 6379/15000 [12:41<16:09,  8.90it/s]


 epoch: 6378 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6379 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.25, test_acc: 96.9%

input:       the image of the crucifixion recurs in religious sacraments and the proliferation of symbols of the cross in homes


 43%|████▎     | 6381/15000 [12:41<16:54,  8.50it/s]


target:      the image of the crucifixion recurs in religious sacraments and the proliferation of symbols of the cross in homes and

prediction:  the image of the crucifixion recurs in religious sacraments and the proliferation of symbols of the cross in homes the

 epoch: 6380 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6381 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 43%|████▎     | 6383/15000 [12:41<17:04,  8.41it/s]


 epoch: 6382 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6383 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 43%|████▎     | 6385/15000 [12:42<17:44,  8.09it/s]


 epoch: 6384 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6385 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 43%|████▎     | 6387/15000 [12:42<23:54,  6.00it/s]


 epoch: 6386 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6387 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 43%|████▎     | 6389/15000 [12:42<20:28,  7.01it/s]


 epoch: 6388 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6389 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.3%


 43%|████▎     | 6390/15000 [12:42<21:55,  6.55it/s]


input:       xenix relied almost exclusively on cli ms dos came with complementary graphical dos shell the windows family came bundled

target:      xenix relied almost exclusively on cli ms dos came with complementary graphical dos shell the windows family came bundled with

prediction:  xenix relied almost exclusively on cli ms dos came with complementary graphical dos shell the windows family came bundled the

 epoch: 6390 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 43%|████▎     | 6392/15000 [12:43<20:47,  6.90it/s]


 epoch: 6391 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6392 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 43%|████▎     | 6394/15000 [12:43<19:44,  7.26it/s]


 epoch: 6393 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6394 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 43%|████▎     | 6396/15000 [12:43<19:06,  7.51it/s]


 epoch: 6395 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6396 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 43%|████▎     | 6398/15000 [12:43<18:20,  7.82it/s]


 epoch: 6397 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6398 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.3%


 43%|████▎     | 6399/15000 [12:44<18:13,  7.87it/s]


 epoch: 6399 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

input:       john dewey and harvey carr advanced the idea of functionalism an expansive approach to psychology that underlined the darwinian

target:      john dewey and harvey carr advanced the idea of functionalism an expansive approach to psychology that underlined the darwinian idea

prediction:  john dewey and harvey carr advanced the idea of functionalism an expansive approach to psychology that underlined the darwinian the


 43%|████▎     | 6400/15000 [12:44<21:32,  6.66it/s]


 epoch: 6400 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 43%|████▎     | 6403/15000 [12:44<26:04,  5.50it/s]


 epoch: 6401 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6402 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 6403 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 43%|████▎     | 6405/15000 [12:45<20:37,  6.95it/s]


 epoch: 6404 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6405 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 6406 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 43%|████▎     | 6409/15000 [12:45<15:39,  9.14it/s]


 epoch: 6407 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6408 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6409 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 43%|████▎     | 6411/15000 [12:45<15:51,  9.03it/s]


input:       billions of views and their panel at minecon had the highest attendance other well known youtube personalities include jordan

target:      billions of views and their panel at minecon had the highest attendance other well known youtube personalities include jordan maron

prediction:  billions of views and their panel at minecon had the highest attendance other well known youtube personalities include jordan the

 epoch: 6410 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6411 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 43%|████▎     | 6414/15000 [12:45<14:23,  9.94it/s]


 epoch: 6412 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6413 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6414 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 43%|████▎     | 6416/15000 [12:46<16:22,  8.73it/s]


 epoch: 6415 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6416 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6417 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%


 43%|████▎     | 6420/15000 [12:46<14:35,  9.80it/s]


 epoch: 6418 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6419 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       each copy had to be written out on papyrus roll volumen by scribes the codex pages bound to spine

target:      each copy had to be written out on papyrus roll volumen by scribes the codex pages bound to spine was

prediction:  each copy had to be written out on papyrus roll volumen by scribes the codex pages bound to spine the

 epoch: 6420 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 43%|████▎     | 6422/15000 [12:46<13:46, 10.38it/s]


 epoch: 6421 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 6422 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6423 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 43%|████▎     | 6426/15000 [12:47<13:18, 10.74it/s]


 epoch: 6424 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.2%

 epoch: 6425 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6426 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 43%|████▎     | 6428/15000 [12:47<13:20, 10.71it/s]


 epoch: 6427 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6428 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6429 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%


 43%|████▎     | 6430/15000 [12:47<14:08, 10.10it/s]


input:       eleven world cups and the women national team has won the fifa women world cup and olympic soccer tournament

target:      eleven world cups and the women national team has won the fifa women world cup and olympic soccer tournament four

prediction:  eleven world cups and the women national team has won the fifa women world cup and olympic soccer tournament the

 epoch: 6430 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6431 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 43%|████▎     | 6434/15000 [12:47<13:04, 10.92it/s]


 epoch: 6432 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6433 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6434 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 43%|████▎     | 6436/15000 [12:48<13:02, 10.94it/s]


 epoch: 6435 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.5%

 epoch: 6436 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6437 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 43%|████▎     | 6438/15000 [12:48<12:58, 10.99it/s]


 epoch: 6438 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6439 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       video game at the kids choice awards while the game itself won the still playing award at the golden

target:      video game at the kids choice awards while the game itself won the still playing award at the golden joystick

prediction:  video game at the kids choice awards while the game itself won the still playing award at the golden the


 43%|████▎     | 6442/15000 [12:48<13:21, 10.67it/s]


 epoch: 6440 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6441 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6442 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6443 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 43%|████▎     | 6446/15000 [12:49<20:38,  6.91it/s]


 epoch: 6444 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6445 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6446 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.8%


 43%|████▎     | 6449/15000 [12:49<16:56,  8.41it/s]


 epoch: 6447 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6448 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6449 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 43%|████▎     | 6451/15000 [12:49<16:29,  8.64it/s]


input:       and other great monuments of classical athens the city became in pericles words an education for hellas usually quoted

target:      and other great monuments of classical athens the city became in pericles words an education for hellas usually quoted as

prediction:  and other great monuments of classical athens the city became in pericles words an education for hellas usually quoted the

 epoch: 6450 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6451 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 43%|████▎     | 6453/15000 [12:50<15:19,  9.30it/s]


 epoch: 6452 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6453 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6454 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 43%|████▎     | 6457/15000 [12:50<14:13, 10.01it/s]


 epoch: 6455 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6456 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6457 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 43%|████▎     | 6460/15000 [12:51<22:03,  6.45it/s]


 epoch: 6458 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6459 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       take them they have already been introduced to the subject and know what to expect classes provide high school

target:      take them they have already been introduced to the subject and know what to expect classes provide high school college

prediction:  take them they have already been introduced to the subject and know what to expect classes provide high school the

 epoch: 6460 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%


 43%|████▎     | 6462/15000 [12:51<18:47,  7.57it/s]


 epoch: 6461 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 96.9%

 epoch: 6462 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6463 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 43%|████▎     | 6466/15000 [12:51<15:11,  9.36it/s]


 epoch: 6464 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 6465 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6466 | train_loss: 0.19, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.0%


 43%|████▎     | 6468/15000 [12:51<14:21,  9.90it/s]


 epoch: 6467 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6468 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6469 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.28, test_acc: 96.8%


 43%|████▎     | 6470/15000 [12:52<14:59,  9.48it/s]


input:       afk away from keyboard online gaming has become an integral part of internet culture with dedicated communities esports and

target:      afk away from keyboard online gaming has become an integral part of internet culture with dedicated communities esports and streaming

prediction:  afk away from keyboard online gaming has become an integral part of internet culture with dedicated communities esports and the

 epoch: 6470 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6471 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 43%|████▎     | 6474/15000 [12:52<13:38, 10.42it/s]


 epoch: 6472 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6473 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6474 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 43%|████▎     | 6476/15000 [12:52<13:18, 10.68it/s]


 epoch: 6475 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6476 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.1%

 epoch: 6477 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 43%|████▎     | 6480/15000 [12:53<13:12, 10.74it/s]


 epoch: 6478 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6479 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

input:       phasis as the boundary between europe and asia europe eastern frontier was defined in the st century by geographer

target:      phasis as the boundary between europe and asia europe eastern frontier was defined in the st century by geographer strabo

prediction:  phasis as the boundary between europe and asia europe eastern frontier was defined in the st century by geographer the

 epoch: 6480 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 43%|████▎     | 6482/15000 [12:53<13:03, 10.88it/s]


 epoch: 6481 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6482 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6483 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 43%|████▎     | 6486/15000 [12:53<12:45, 11.12it/s]


 epoch: 6484 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6485 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6486 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 43%|████▎     | 6488/15000 [12:54<22:52,  6.20it/s]


 epoch: 6487 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6488 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 6489 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 43%|████▎     | 6490/15000 [12:54<20:28,  6.93it/s]


input:       visual art forms across africa and may be included in the study of african art the term african art

target:      visual art forms across africa and may be included in the study of african art the term african art does

prediction:  visual art forms across africa and may be included in the study of african art the term african art the

 epoch: 6490 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6491 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 43%|████▎     | 6493/15000 [12:54<17:32,  8.09it/s]


 epoch: 6492 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6493 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 43%|████▎     | 6496/15000 [12:55<16:06,  8.80it/s]


 epoch: 6494 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6495 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6496 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 43%|████▎     | 6498/15000 [12:55<16:43,  8.47it/s]


 epoch: 6497 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6498 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 43%|████▎     | 6500/15000 [12:55<18:18,  7.74it/s]


 epoch: 6499 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

input:       generally includes two types of physicists experimental physicists who specialize in the observation of natural phenomena and the development

target:      generally includes two types of physicists experimental physicists who specialize in the observation of natural phenomena and the development and

prediction:  generally includes two types of physicists experimental physicists who specialize in the observation of natural phenomena and the development the

 epoch: 6500 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 43%|████▎     | 6502/15000 [12:56<29:06,  4.87it/s]


 epoch: 6501 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 6502 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 43%|████▎     | 6504/15000 [12:56<22:07,  6.40it/s]


 epoch: 6503 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6504 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 43%|████▎     | 6506/15000 [12:56<18:57,  7.46it/s]


 epoch: 6505 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6506 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.1%


 43%|████▎     | 6508/15000 [12:56<16:53,  8.38it/s]


 epoch: 6507 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6508 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%


 43%|████▎     | 6510/15000 [12:57<20:14,  6.99it/s]


 epoch: 6509 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       the wider pop culture has worldwide influence and following beyonc taylor swift miley cyrus ariana grande eminem lady

target:      the wider pop culture has worldwide influence and following beyonc taylor swift miley cyrus ariana grande eminem lady gaga

prediction:  the wider pop culture has worldwide influence and following beyonc taylor swift miley cyrus ariana grande eminem lady the

 epoch: 6510 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 43%|████▎     | 6512/15000 [12:57<19:00,  7.44it/s]


 epoch: 6511 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6512 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%


 43%|████▎     | 6514/15000 [12:57<17:40,  8.00it/s]


 epoch: 6513 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6514 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 43%|████▎     | 6516/15000 [12:58<20:31,  6.89it/s]


 epoch: 6515 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6516 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%


 43%|████▎     | 6518/15000 [12:58<17:28,  8.09it/s]


 epoch: 6517 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 6518 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6519 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 43%|████▎     | 6520/15000 [12:58<16:13,  8.71it/s]


input:       approximate latitude and longitude coldest month average hottest month average and annual average temperatures in degrees it is notable

target:      approximate latitude and longitude coldest month average hottest month average and annual average temperatures in degrees it is notable how

prediction:  approximate latitude and longitude coldest month average hottest month average and annual average temperatures in degrees it is notable the

 epoch: 6520 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6521 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 43%|████▎     | 6523/15000 [12:58<14:30,  9.74it/s]


 epoch: 6522 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6523 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6524 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 44%|████▎     | 6527/15000 [12:59<13:26, 10.51it/s]


 epoch: 6525 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6526 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6527 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 44%|████▎     | 6529/15000 [12:59<13:31, 10.44it/s]


 epoch: 6528 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6529 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

input:       military established control of territory through war but after city or people was brought under treaty the mission turned

target:      military established control of territory through war but after city or people was brought under treaty the mission turned to

prediction:  military established control of territory through war but after city or people was brought under treaty the mission turned the


 44%|████▎     | 6531/15000 [13:00<25:17,  5.58it/s]


 epoch: 6530 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6531 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6532 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%


 44%|████▎     | 6535/15000 [13:00<18:15,  7.73it/s]


 epoch: 6533 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 6534 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 6535 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.3%


 44%|████▎     | 6538/15000 [13:00<15:26,  9.14it/s]


 epoch: 6536 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6537 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6538 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 44%|████▎     | 6540/15000 [13:00<15:16,  9.23it/s]


 epoch: 6539 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

input:       ever more diverse life forms species that were unable to adapt to the changing environment and competition from other

target:      ever more diverse life forms species that were unable to adapt to the changing environment and competition from other life

prediction:  ever more diverse life forms species that were unable to adapt to the changing environment and competition from other the

 epoch: 6540 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 44%|████▎     | 6542/15000 [13:01<14:40,  9.61it/s]


 epoch: 6541 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6542 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6543 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 44%|████▎     | 6546/15000 [13:01<20:05,  7.02it/s]


 epoch: 6544 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6545 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%

 epoch: 6546 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 44%|████▎     | 6548/15000 [13:02<17:40,  7.97it/s]


 epoch: 6547 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6548 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%

 epoch: 6549 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 44%|████▎     | 6551/15000 [13:02<16:26,  8.56it/s]


input:       the typical software debugger tasks on low level software and firmware the debugging process normally begins with identifying the

target:      the typical software debugger tasks on low level software and firmware the debugging process normally begins with identifying the steps

prediction:  the typical software debugger tasks on low level software and firmware the debugging process normally begins with identifying the the

 epoch: 6550 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6551 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 44%|████▎     | 6553/15000 [13:02<14:58,  9.40it/s]


 epoch: 6552 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6553 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6554 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 44%|████▎     | 6557/15000 [13:02<13:19, 10.56it/s]


 epoch: 6555 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6556 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6557 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 44%|████▎     | 6559/15000 [13:03<20:04,  7.01it/s]


 epoch: 6558 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6559 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.5%

input:       the united states and mexico france italy portugal spain romania greece and the countries of latin america use six

target:      the united states and mexico france italy portugal spain romania greece and the countries of latin america use six continent

prediction:  the united states and mexico france italy portugal spain romania greece and the countries of latin america use six the


 44%|████▎     | 6562/15000 [13:03<17:02,  8.25it/s]


 epoch: 6560 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6561 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6562 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 44%|████▍     | 6564/15000 [13:03<15:20,  9.17it/s]


 epoch: 6563 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

 epoch: 6564 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6565 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 44%|████▍     | 6568/15000 [13:04<13:55, 10.09it/s]


 epoch: 6566 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6567 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6568 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 44%|████▍     | 6570/15000 [13:04<14:16,  9.84it/s]


 epoch: 6569 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

input:       tasks and parodies of works in popular culture by may over four million minecraft related youtube videos had been

target:      tasks and parodies of works in popular culture by may over four million minecraft related youtube videos had been uploaded

prediction:  tasks and parodies of works in popular culture by may over four million minecraft related youtube videos had been the

 epoch: 6570 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6571 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 44%|████▍     | 6574/15000 [13:05<19:58,  7.03it/s]


 epoch: 6572 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6573 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6574 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 44%|████▍     | 6577/15000 [13:05<16:58,  8.27it/s]


 epoch: 6575 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6576 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6577 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 44%|████▍     | 6580/15000 [13:05<15:17,  9.18it/s]


 epoch: 6578 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6579 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

input:       years after his death following gratian death maximus had to deal with valentinian ii at the time only twelve

target:      years after his death following gratian death maximus had to deal with valentinian ii at the time only twelve years

prediction:  years after his death following gratian death maximus had to deal with valentinian ii at the time only twelve the

 epoch: 6580 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 44%|████▍     | 6582/15000 [13:05<14:10,  9.89it/s]


 epoch: 6581 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6582 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6583 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 44%|████▍     | 6586/15000 [13:06<13:17, 10.55it/s]


 epoch: 6584 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6585 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6586 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 44%|████▍     | 6588/15000 [13:06<21:26,  6.54it/s]


 epoch: 6587 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6588 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6589 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 44%|████▍     | 6590/15000 [13:07<19:34,  7.16it/s]


input:       and much admired facial feature the so called grecian profile these faces are often contrasted with that of warrior

target:      and much admired facial feature the so called grecian profile these faces are often contrasted with that of warrior sala

prediction:  and much admired facial feature the so called grecian profile these faces are often contrasted with that of warrior the

 epoch: 6590 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6591 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 44%|████▍     | 6594/15000 [13:07<15:37,  8.97it/s]


 epoch: 6592 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6593 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6594 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 44%|████▍     | 6596/15000 [13:07<14:56,  9.37it/s]


 epoch: 6595 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6596 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6597 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%


 44%|████▍     | 6598/15000 [13:07<14:02,  9.98it/s]


 epoch: 6598 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6599 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

input:       into effect in it reorganized the government into federation administered by three branches executive judicial and legislative on the

target:      into effect in it reorganized the government into federation administered by three branches executive judicial and legislative on the principle

prediction:  into effect in it reorganized the government into federation administered by three branches executive judicial and legislative on the the


 44%|████▍     | 6600/15000 [13:07<14:15,  9.82it/s]


 epoch: 6600 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 44%|████▍     | 6602/15000 [13:08<21:48,  6.42it/s]


 epoch: 6601 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6602 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 44%|████▍     | 6604/15000 [13:08<19:19,  7.24it/s]


 epoch: 6603 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6604 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 44%|████▍     | 6606/15000 [13:08<18:25,  7.60it/s]


 epoch: 6605 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 6606 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 44%|████▍     | 6608/15000 [13:09<17:37,  7.93it/s]


 epoch: 6607 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.4%

 epoch: 6608 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 44%|████▍     | 6610/15000 [13:09<20:04,  6.97it/s]


 epoch: 6609 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       to reddit in there were approximately million monthly users who are known as redditors the site content is divided

target:      to reddit in there were approximately million monthly users who are known as redditors the site content is divided into

prediction:  to reddit in there were approximately million monthly users who are known as redditors the site content is divided the

 epoch: 6610 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 44%|████▍     | 6612/15000 [13:09<19:06,  7.32it/s]


 epoch: 6611 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6612 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 44%|████▍     | 6614/15000 [13:10<17:18,  8.08it/s]


 epoch: 6613 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6614 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 44%|████▍     | 6616/15000 [13:10<31:54,  4.38it/s]


 epoch: 6615 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6616 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 44%|████▍     | 6618/15000 [13:10<23:36,  5.92it/s]


 epoch: 6617 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6618 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 44%|████▍     | 6620/15000 [13:11<23:41,  5.89it/s]


 epoch: 6619 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

input:       they cannot mix with genes of other organisms during reproduction in contrast the offspring of sexual organisms contain random

target:      they cannot mix with genes of other organisms during reproduction in contrast the offspring of sexual organisms contain random mixtures

prediction:  they cannot mix with genes of other organisms during reproduction in contrast the offspring of sexual organisms contain random the

 epoch: 6620 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 44%|████▍     | 6623/15000 [13:11<16:55,  8.25it/s]


 epoch: 6621 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6622 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6623 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 44%|████▍     | 6625/15000 [13:11<15:00,  9.30it/s]


 epoch: 6624 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6625 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6626 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 44%|████▍     | 6629/15000 [13:12<13:32, 10.30it/s]


 epoch: 6627 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6628 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6629 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

input:       city of rome had become major political issue in the late republic when the state began to provide grain

target:      city of rome had become major political issue in the late republic when the state began to provide grain dole

prediction:  city of rome had become major political issue in the late republic when the state began to provide grain the


 44%|████▍     | 6631/15000 [13:12<19:39,  7.10it/s]


 epoch: 6630 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6631 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6632 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 44%|████▍     | 6635/15000 [13:12<15:27,  9.02it/s]


 epoch: 6633 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6634 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6635 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 44%|████▍     | 6637/15000 [13:13<14:29,  9.62it/s]


 epoch: 6636 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6637 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 6638 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 44%|████▍     | 6639/15000 [13:13<13:45, 10.13it/s]


 epoch: 6639 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

input:       disastrous defeat in egypt in and the death of cimon in action against the persians on cyprus in as

target:      disastrous defeat in egypt in and the death of cimon in action against the persians on cyprus in as the

prediction:  disastrous defeat in egypt in and the death of cimon in action against the persians on cyprus in as the

 epoch: 6640 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 44%|████▍     | 6643/15000 [13:13<13:55, 10.00it/s]


 epoch: 6641 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6642 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6643 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 44%|████▍     | 6645/15000 [13:14<22:41,  6.14it/s]


 epoch: 6644 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6645 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6646 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 44%|████▍     | 6649/15000 [13:14<16:59,  8.19it/s]


 epoch: 6647 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6648 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6649 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%


 44%|████▍     | 6651/15000 [13:14<16:24,  8.48it/s]


input:       capital from ventura ital risk to finance their captures the capital should be refunded by the goods they would

target:      capital from ventura ital risk to finance their captures the capital should be refunded by the goods they would bring

prediction:  capital from ventura ital risk to finance their captures the capital should be refunded by the goods they would the

 epoch: 6650 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6651 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 44%|████▍     | 6653/15000 [13:15<15:13,  9.14it/s]


 epoch: 6652 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6653 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6654 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 44%|████▍     | 6657/15000 [13:15<13:46, 10.10it/s]


 epoch: 6655 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 6656 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6657 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 44%|████▍     | 6660/15000 [13:16<22:06,  6.29it/s]


 epoch: 6658 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%

 epoch: 6659 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

input:       memes are not accountable within neo darwinian model of evolutionary culture within cultural anthropology materialist approaches are skeptical of

target:      memes are not accountable within neo darwinian model of evolutionary culture within cultural anthropology materialist approaches are skeptical of such

prediction:  memes are not accountable within neo darwinian model of evolutionary culture within cultural anthropology materialist approaches are skeptical of the

 epoch: 6660 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 44%|████▍     | 6662/15000 [13:16<18:39,  7.45it/s]


 epoch: 6661 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6662 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6663 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%


 44%|████▍     | 6666/15000 [13:16<14:59,  9.26it/s]


 epoch: 6664 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6665 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 6666 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 44%|████▍     | 6668/15000 [13:16<14:03,  9.88it/s]


 epoch: 6667 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6668 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

 epoch: 6669 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 44%|████▍     | 6670/15000 [13:17<14:34,  9.52it/s]


input:       peoples raiding across the borders while the emperor was focused primarily on the dangers from the sassanid persian empire

target:      peoples raiding across the borders while the emperor was focused primarily on the dangers from the sassanid persian empire leading

prediction:  peoples raiding across the borders while the emperor was focused primarily on the dangers from the sassanid persian empire the

 epoch: 6670 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6671 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 44%|████▍     | 6672/15000 [13:17<14:07,  9.82it/s]


 epoch: 6672 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 44%|████▍     | 6674/15000 [13:17<17:03,  8.14it/s]


 epoch: 6673 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6674 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6675 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 45%|████▍     | 6678/15000 [13:17<14:32,  9.54it/s]


 epoch: 6676 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 6677 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6678 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%


 45%|████▍     | 6680/15000 [13:18<14:30,  9.55it/s]


 epoch: 6679 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

input:       of light is concerned not only with visible light but also with infrared and ultraviolet radiation which exhibit all

target:      of light is concerned not only with visible light but also with infrared and ultraviolet radiation which exhibit all of

prediction:  of light is concerned not only with visible light but also with infrared and ultraviolet radiation which exhibit all the

 epoch: 6680 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6681 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%


 45%|████▍     | 6684/15000 [13:18<13:06, 10.57it/s]


 epoch: 6682 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6683 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6684 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 45%|████▍     | 6686/15000 [13:18<13:13, 10.48it/s]


 epoch: 6685 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6686 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%


 45%|████▍     | 6688/15000 [13:19<22:19,  6.21it/s]


 epoch: 6687 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6688 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6689 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 45%|████▍     | 6690/15000 [13:19<20:11,  6.86it/s]


input:       and the consternation that this statement caused the writer matteo bandello observed leonardo at work and wrote that some

target:      and the consternation that this statement caused the writer matteo bandello observed leonardo at work and wrote that some days

prediction:  and the consternation that this statement caused the writer matteo bandello observed leonardo at work and wrote that some the

 epoch: 6690 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6691 | train_loss: 0.27, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 45%|████▍     | 6694/15000 [13:19<16:09,  8.56it/s]


 epoch: 6692 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6693 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 6694 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 45%|████▍     | 6696/15000 [13:20<14:55,  9.28it/s]


 epoch: 6695 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6696 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 6697 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 45%|████▍     | 6698/15000 [13:20<14:14,  9.72it/s]


 epoch: 6698 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6699 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       h displaystyle omega text is estimated to be less than independent lines of evidence from type ia supernovae

target:      h displaystyle omega text is estimated to be less than independent lines of evidence from type ia supernovae and

prediction:  s displaystyle omega text is estimated to be less than independent lines of evidence from type ia supernovae the


 45%|████▍     | 6700/15000 [13:20<14:20,  9.65it/s]


 epoch: 6700 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.8%


 45%|████▍     | 6702/15000 [13:21<23:55,  5.78it/s]


 epoch: 6701 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6702 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6703 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 45%|████▍     | 6706/15000 [13:21<18:20,  7.54it/s]


 epoch: 6704 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6705 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6706 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 45%|████▍     | 6708/15000 [13:21<17:28,  7.91it/s]


 epoch: 6707 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%

 epoch: 6708 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 45%|████▍     | 6710/15000 [13:22<18:51,  7.33it/s]


 epoch: 6709 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       january in he called the existence of polar continent probable and in another copy of his journal he wrote

target:      january in he called the existence of polar continent probable and in another copy of his journal he wrote firmly

prediction:  january in he called the existence of polar continent probable and in another copy of his journal he wrote the

 epoch: 6710 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 45%|████▍     | 6712/15000 [13:22<18:50,  7.33it/s]


 epoch: 6711 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6712 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 45%|████▍     | 6714/15000 [13:22<17:54,  7.71it/s]


 epoch: 6713 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6714 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 45%|████▍     | 6716/15000 [13:22<20:41,  6.67it/s]


 epoch: 6715 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6716 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 45%|████▍     | 6718/15000 [13:23<18:50,  7.32it/s]


 epoch: 6717 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6718 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 96.9%


 45%|████▍     | 6720/15000 [13:23<20:01,  6.89it/s]


 epoch: 6719 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

input:       that eukaryotes themselves originated from horizontal gene transfers between bacteria and archaea some heritable changes cannot be explained by

target:      that eukaryotes themselves originated from horizontal gene transfers between bacteria and archaea some heritable changes cannot be explained by changes

prediction:  that eukaryotes themselves originated from horizontal gene transfers between bacteria and archaea some heritable changes cannot be explained by the

 epoch: 6720 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 45%|████▍     | 6722/15000 [13:23<18:23,  7.50it/s]


 epoch: 6721 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.3%

 epoch: 6722 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 45%|████▍     | 6724/15000 [13:23<17:52,  7.72it/s]


 epoch: 6723 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 6724 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 45%|████▍     | 6726/15000 [13:24<18:09,  7.60it/s]


 epoch: 6725 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6726 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 45%|████▍     | 6728/15000 [13:24<17:54,  7.70it/s]


 epoch: 6727 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6728 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 45%|████▍     | 6729/15000 [13:24<16:57,  8.13it/s]


 epoch: 6729 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.5%

input:       reverse engineering or debugging target process it is actively used by recognized publishers in copy protection schemas but is

target:      reverse engineering or debugging target process it is actively used by recognized publishers in copy protection schemas but is also

prediction:  reverse engineering or debugging target process it is actively used by recognized publishers in copy protection schemas but is the


 45%|████▍     | 6732/15000 [13:25<24:50,  5.55it/s]


 epoch: 6730 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6731 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6732 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.7%


 45%|████▍     | 6735/15000 [13:25<17:50,  7.72it/s]


 epoch: 6733 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%

 epoch: 6734 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6735 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%


 45%|████▍     | 6737/15000 [13:25<15:22,  8.95it/s]


 epoch: 6736 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6737 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6738 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 45%|████▍     | 6739/15000 [13:25<14:11,  9.70it/s]


 epoch: 6739 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       in the census of the australian population declared some variety of christianity and stated no religion in recent australian

target:      in the census of the australian population declared some variety of christianity and stated no religion in recent australian and

prediction:  in the census of the australian population declared some variety of christianity and stated no religion in recent australian the

 epoch: 6740 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 45%|████▍     | 6743/15000 [13:26<13:37, 10.11it/s]


 epoch: 6741 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6742 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6743 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6744 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 45%|████▍     | 6747/15000 [13:27<19:31,  7.05it/s]


 epoch: 6745 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6746 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.4%

 epoch: 6747 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 45%|████▍     | 6749/15000 [13:27<17:05,  8.04it/s]


 epoch: 6748 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6749 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       contributed to the depopulation after this enslaved africans who had developed immunities to these diseases were quickly brought in

target:      contributed to the depopulation after this enslaved africans who had developed immunities to these diseases were quickly brought in to

prediction:  contributed to the depopulation after this enslaved africans who had developed immunities to these diseases were quickly brought in the

 epoch: 6750 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 45%|████▌     | 6753/15000 [13:27<14:40,  9.37it/s]


 epoch: 6751 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.19, test_acc: 97.5%

 epoch: 6752 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6753 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 45%|████▌     | 6755/15000 [13:27<13:51,  9.91it/s]


 epoch: 6754 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6755 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6756 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 45%|████▌     | 6757/15000 [13:28<13:31, 10.15it/s]


 epoch: 6757 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6758 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 45%|████▌     | 6760/15000 [13:28<17:07,  8.02it/s]


 epoch: 6759 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

input:       soviet society following world war ii jewish psychologists past and present including lev vygotsky luria and aron zalkind

target:      soviet society following world war ii jewish psychologists past and present including lev vygotsky luria and aron zalkind were

prediction:  soviet society following world war ii jewish psychologists past and present including lev vygotsky luria and aron zalkind the

 epoch: 6760 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 45%|████▌     | 6762/15000 [13:28<15:07,  9.08it/s]


 epoch: 6761 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6762 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6763 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 45%|████▌     | 6766/15000 [13:29<13:10, 10.42it/s]


 epoch: 6764 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 6765 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6766 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.0%


 45%|████▌     | 6768/15000 [13:29<12:46, 10.73it/s]


 epoch: 6767 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6768 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6769 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 45%|████▌     | 6770/15000 [13:29<13:30, 10.15it/s]


input:       strengthened christian defiance diocletian undertook the most severe persecution of christians from to from the nd century onward the

target:      strengthened christian defiance diocletian undertook the most severe persecution of christians from to from the nd century onward the church

prediction:  strengthened christian defiance diocletian undertook the most severe persecution of christians from to from the nd century onward the the

 epoch: 6770 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6771 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.23, test_acc: 97.2%


 45%|████▌     | 6774/15000 [13:29<12:52, 10.65it/s]


 epoch: 6772 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6773 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.1%

 epoch: 6774 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 45%|████▌     | 6776/15000 [13:29<12:30, 10.97it/s]


 epoch: 6775 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6776 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6777 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 45%|████▌     | 6780/15000 [13:30<12:42, 10.78it/s]


 epoch: 6778 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 6779 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       home of megaregions these include the quebec city windsor corridor golden horseshoe both of which are considered part of

target:      home of megaregions these include the quebec city windsor corridor golden horseshoe both of which are considered part of the

prediction:  home of megaregions these include the quebec city windsor corridor golden horseshoe both of which are considered part of the

 epoch: 6780 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 45%|████▌     | 6782/15000 [13:30<12:32, 10.92it/s]


 epoch: 6781 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6782 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6783 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 45%|████▌     | 6786/15000 [13:30<12:25, 11.02it/s]


 epoch: 6784 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6785 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 6786 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6787 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 45%|████▌     | 6788/15000 [13:31<21:40,  6.32it/s]


 epoch: 6788 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6789 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       technological advances in computing and data communication widespread access to the internet emerged as the cost of infrastructure dropped

target:      technological advances in computing and data communication widespread access to the internet emerged as the cost of infrastructure dropped by

prediction:  technological advances in computing and data communication widespread access to the internet emerged as the cost of infrastructure dropped the


 45%|████▌     | 6792/15000 [13:31<17:14,  7.94it/s]


 epoch: 6790 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6791 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.6%

 epoch: 6792 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 45%|████▌     | 6794/15000 [13:32<15:42,  8.70it/s]


 epoch: 6793 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6794 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6795 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 45%|████▌     | 6798/15000 [13:32<13:55,  9.82it/s]


 epoch: 6796 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6797 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6798 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 45%|████▌     | 6800/15000 [13:32<14:07,  9.67it/s]


 epoch: 6799 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%

input:       however some planetary geophysicists study the giant planets and exoplanets although the greek language origin prefix geo refers to

target:      however some planetary geophysicists study the giant planets and exoplanets although the greek language origin prefix geo refers to earth

prediction:  however some planetary geophysicists study the giant planets and exoplanets although the greek language origin prefix geo refers to the

 epoch: 6800 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6801 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%


 45%|████▌     | 6804/15000 [13:33<15:41,  8.71it/s]


 epoch: 6802 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6803 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6804 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 45%|████▌     | 6806/15000 [13:33<14:39,  9.31it/s]


 epoch: 6805 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6806 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6807 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 45%|████▌     | 6810/15000 [13:33<13:40,  9.98it/s]


 epoch: 6808 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6809 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

input:       the thirteen colonies of british america the english did not establish settlements north or east of the st lawrence

target:      the thirteen colonies of british america the english did not establish settlements north or east of the st lawrence valley

prediction:  the thirteen colonies of british america the english did not establish settlements north or east of the st lawrence the

 epoch: 6810 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 45%|████▌     | 6812/15000 [13:33<13:19, 10.24it/s]


 epoch: 6811 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 6812 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6813 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 45%|████▌     | 6816/15000 [13:34<13:16, 10.27it/s]


 epoch: 6814 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6815 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6816 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 45%|████▌     | 6818/15000 [13:34<12:57, 10.53it/s]


 epoch: 6817 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6818 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6819 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 45%|████▌     | 6820/15000 [13:34<13:27, 10.13it/s]


input:       example java and all infer types in certain limited cases additionally some programming languages allow for some types to

target:      example java and all infer types in certain limited cases additionally some programming languages allow for some types to be

prediction:  example java and all infer types in certain limited cases additionally some programming languages allow for some types to the

 epoch: 6820 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 6821 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 45%|████▌     | 6824/15000 [13:35<12:44, 10.69it/s]


 epoch: 6822 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6823 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6824 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 46%|████▌     | 6826/15000 [13:35<13:43,  9.93it/s]


 epoch: 6825 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6826 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 46%|████▌     | 6828/15000 [13:35<14:38,  9.30it/s]


 epoch: 6827 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6828 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 46%|████▌     | 6830/15000 [13:35<16:33,  8.23it/s]


 epoch: 6829 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

input:       brothers of his death described leonardo feelings for his pupils as both loving and passionate it has been claimed

target:      brothers of his death described leonardo feelings for his pupils as both loving and passionate it has been claimed since

prediction:  brothers of his death described leonardo feelings for his pupils as both loving and passionate it has been claimed the

 epoch: 6830 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 46%|████▌     | 6832/15000 [13:36<25:30,  5.34it/s]


 epoch: 6831 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6832 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 46%|████▌     | 6834/15000 [13:36<21:25,  6.35it/s]


 epoch: 6833 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6834 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.8%


 46%|████▌     | 6836/15000 [13:36<19:03,  7.14it/s]


 epoch: 6835 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6836 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 46%|████▌     | 6838/15000 [13:37<17:54,  7.60it/s]


 epoch: 6837 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6838 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.26, test_acc: 96.9%


 46%|████▌     | 6840/15000 [13:37<20:32,  6.62it/s]


 epoch: 6839 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.0%

input:       of restoration trompe il painting when ludovico sforza was overthrown by france in leonardo fled milan for venice accompanied

target:      of restoration trompe il painting when ludovico sforza was overthrown by france in leonardo fled milan for venice accompanied by

prediction:  of restoration trompe il painting when ludovico sforza was overthrown by france in leonardo fled milan for venice accompanied the

 epoch: 6840 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 46%|████▌     | 6842/15000 [13:37<18:42,  7.27it/s]


 epoch: 6841 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6842 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 46%|████▌     | 6844/15000 [13:38<17:24,  7.81it/s]


 epoch: 6843 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6844 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 46%|████▌     | 6846/15000 [13:38<21:18,  6.38it/s]


 epoch: 6845 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 6846 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 46%|████▌     | 6848/15000 [13:38<17:09,  7.92it/s]


 epoch: 6847 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6848 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6849 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 46%|████▌     | 6850/15000 [13:38<16:24,  8.28it/s]


input:       has been developed to allow virtual designs to be printed using professional printers or personal printers such as makerbot

target:      has been developed to allow virtual designs to be printed using professional printers or personal printers such as makerbot and

prediction:  has been developed to allow virtual designs to be printed using professional printers or personal printers such as makerbot the

 epoch: 6850 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6851 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 46%|████▌     | 6854/15000 [13:39<13:49,  9.82it/s]


 epoch: 6852 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6853 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6854 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 46%|████▌     | 6856/15000 [13:39<13:29, 10.06it/s]


 epoch: 6855 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6856 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6857 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 46%|████▌     | 6858/15000 [13:39<13:10, 10.30it/s]


 epoch: 6858 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 46%|████▌     | 6860/15000 [13:39<16:40,  8.13it/s]


 epoch: 6859 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       binding the argument value to the parameter is done by powershell itself but for external executables arguments are parsed

target:      binding the argument value to the parameter is done by powershell itself but for external executables arguments are parsed by

prediction:  binding the argument value to the parameter is done by powershell itself but for external executables arguments are parsed the

 epoch: 6860 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 46%|████▌     | 6862/15000 [13:40<15:15,  8.89it/s]


 epoch: 6861 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 6862 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6863 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 46%|████▌     | 6866/15000 [13:40<13:22, 10.13it/s]


 epoch: 6864 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6865 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6866 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 46%|████▌     | 6868/15000 [13:40<12:54, 10.50it/s]


 epoch: 6867 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6868 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6869 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 46%|████▌     | 6870/15000 [13:40<13:44,  9.86it/s]


input:       followed the east coast of australia for the first time the arrival of european settlers in subsequent centuries resulted

target:      followed the east coast of australia for the first time the arrival of european settlers in subsequent centuries resulted in

prediction:  followed the east coast of australia for the first time the arrival of european settlers in subsequent centuries resulted the

 epoch: 6870 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6871 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 46%|████▌     | 6872/15000 [13:41<13:41,  9.90it/s]


 epoch: 6872 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6873 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%


 46%|████▌     | 6876/15000 [13:41<18:56,  7.15it/s]


 epoch: 6874 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6875 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 6876 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 46%|████▌     | 6878/15000 [13:41<16:46,  8.07it/s]


 epoch: 6877 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 6878 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6879 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%


 46%|████▌     | 6880/15000 [13:42<16:12,  8.35it/s]


input:       five years julian had series of victories against invading germanic tribes including the alamanni this allowed him to secure

target:      five years julian had series of victories against invading germanic tribes including the alamanni this allowed him to secure the

prediction:  five years julian had series of victories against invading germanic tribes including the alamanni this allowed him to secure the

 epoch: 6880 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6881 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 46%|████▌     | 6884/15000 [13:42<14:13,  9.51it/s]


 epoch: 6882 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6883 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6884 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 46%|████▌     | 6886/15000 [13:42<13:55,  9.71it/s]


 epoch: 6885 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6886 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

 epoch: 6887 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%


 46%|████▌     | 6888/15000 [13:43<17:59,  7.52it/s]


 epoch: 6888 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6889 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       the internet have each despite an invasive state sponsored censorship apparatus enabled new forms of humour to flourish in

target:      the internet have each despite an invasive state sponsored censorship apparatus enabled new forms of humour to flourish in china

prediction:  the internet have each despite an invasive state sponsored censorship apparatus enabled new forms of humour to flourish in the

 epoch: 6890 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%


 46%|████▌     | 6892/15000 [13:43<15:17,  8.84it/s]


 epoch: 6891 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6892 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 46%|████▌     | 6895/15000 [13:43<13:53,  9.73it/s]


 epoch: 6893 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 6894 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6895 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 46%|████▌     | 6897/15000 [13:44<13:17, 10.17it/s]


 epoch: 6896 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6897 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6898 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 46%|████▌     | 6899/15000 [13:44<12:57, 10.42it/s]


 epoch: 6899 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

input:       art even famous artists were of low social status partly as they worked with their hands portraiture which survives

target:      art even famous artists were of low social status partly as they worked with their hands portraiture which survives mainly

prediction:  art even famous artists were of low social status partly as they worked with their hands portraiture which survives the

 epoch: 6900 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 46%|████▌     | 6901/15000 [13:44<13:33,  9.95it/s]


 epoch: 6901 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 46%|████▌     | 6903/15000 [13:45<22:29,  6.00it/s]


 epoch: 6902 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6903 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6904 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 46%|████▌     | 6907/15000 [13:45<16:54,  7.97it/s]


 epoch: 6905 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 96.9%

 epoch: 6906 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6907 | train_loss: 0.19, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 46%|████▌     | 6909/15000 [13:45<15:30,  8.69it/s]


 epoch: 6908 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6909 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

input:       the left frontal gyrus thereby also demonstrating hemispheric lateralization of brain function soon after carl wernicke identified related area

target:      the left frontal gyrus thereby also demonstrating hemispheric lateralization of brain function soon after carl wernicke identified related area necessary

prediction:  the left frontal gyrus thereby also demonstrating hemispheric lateralization of brain function soon after carl wernicke identified related area the


 46%|████▌     | 6911/15000 [13:45<15:14,  8.85it/s]


 epoch: 6910 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6911 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6912 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 46%|████▌     | 6915/15000 [13:46<13:28, 10.01it/s]


 epoch: 6913 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6914 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6915 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 46%|████▌     | 6917/15000 [13:46<13:22, 10.07it/s]


 epoch: 6916 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6917 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6918 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 46%|████▌     | 6919/15000 [13:46<13:00, 10.35it/s]


 epoch: 6919 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

input:       spread from there to the ottoman empire central asia india and burma during the lifetime of bah ll since

target:      spread from there to the ottoman empire central asia india and burma during the lifetime of bah ll since the

prediction:  spread from there to the ottoman empire central asia india and burma during the lifetime of bah ll since the

 epoch: 6920 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 46%|████▌     | 6923/15000 [13:46<12:53, 10.44it/s]


 epoch: 6921 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6922 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6923 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%


 46%|████▌     | 6925/15000 [13:47<12:45, 10.55it/s]


 epoch: 6924 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6925 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6926 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 46%|████▌     | 6929/15000 [13:47<12:27, 10.79it/s]


 epoch: 6927 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6928 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%

 epoch: 6929 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

input:       from this field of research have seen limited application to programming language design and implementation outside academia type system

target:      from this field of research have seen limited application to programming language design and implementation outside academia type system defines

prediction:  from this field of research have seen limited application to programming language design and implementation outside academia type system the

 epoch: 6930 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 46%|████▌     | 6933/15000 [13:47<13:54,  9.66it/s]


 epoch: 6931 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6932 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6933 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 46%|████▌     | 6935/15000 [13:48<13:34,  9.91it/s]


 epoch: 6934 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6935 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6936 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 46%|████▋     | 6939/15000 [13:48<13:01, 10.31it/s]


 epoch: 6937 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6938 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6939 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 46%|████▋     | 6941/15000 [13:48<14:45,  9.10it/s]


input:       be replaced by the offspring of parents with favourable characteristics for that environment in the early th century competing

target:      be replaced by the offspring of parents with favourable characteristics for that environment in the early th century competing ideas

prediction:  be replaced by the offspring of parents with favourable characteristics for that environment in the early th century competing the

 epoch: 6940 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6941 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 46%|████▋     | 6943/15000 [13:49<15:34,  8.62it/s]


 epoch: 6942 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6943 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 46%|████▋     | 6944/15000 [13:49<16:00,  8.39it/s]


 epoch: 6944 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%


 46%|████▋     | 6946/15000 [13:49<27:43,  4.84it/s]


 epoch: 6945 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 6946 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 46%|████▋     | 6948/15000 [13:50<22:41,  5.91it/s]


 epoch: 6947 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6948 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 46%|████▋     | 6950/15000 [13:50<21:10,  6.34it/s]


 epoch: 6949 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       indigenous peoples the historic connection with the iberian peninsula and africa and waves of immigrants from around the globe

target:      indigenous peoples the historic connection with the iberian peninsula and africa and waves of immigrants from around the globe south

prediction:  indigenous peoples the historic connection with the iberian peninsula and africa and waves of immigrants from around the globe the

 epoch: 6950 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 46%|████▋     | 6952/15000 [13:50<18:44,  7.16it/s]


 epoch: 6951 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6952 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 46%|████▋     | 6954/15000 [13:50<16:32,  8.10it/s]


 epoch: 6953 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6954 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 46%|████▋     | 6956/15000 [13:51<16:18,  8.22it/s]


 epoch: 6955 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 6956 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 46%|████▋     | 6958/15000 [13:51<16:17,  8.23it/s]


 epoch: 6957 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%

 epoch: 6958 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 46%|████▋     | 6959/15000 [13:51<15:54,  8.43it/s]


 epoch: 6959 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

input:       of the pacific region are referred to as oceania the tenth continent on earth inherent to their remoteness and

target:      of the pacific region are referred to as oceania the tenth continent on earth inherent to their remoteness and because

prediction:  of the pacific region are referred to as oceania the tenth continent on earth inherent to their remoteness and the


 46%|████▋     | 6962/15000 [13:52<24:58,  5.36it/s]


 epoch: 6960 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.1%

 epoch: 6961 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 6962 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.27, test_acc: 97.1%


 46%|████▋     | 6964/15000 [13:52<19:50,  6.75it/s]


 epoch: 6963 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 6964 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6965 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 46%|████▋     | 6968/15000 [13:52<14:50,  9.02it/s]


 epoch: 6966 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6967 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6968 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 46%|████▋     | 6970/15000 [13:53<14:44,  9.07it/s]


 epoch: 6969 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       and sub saharan cinemas and between the cinemas of different countries given the vastness of the african continent its

target:      and sub saharan cinemas and between the cinemas of different countries given the vastness of the african continent its music

prediction:  and sub saharan cinemas and between the cinemas of different countries given the vastness of the african continent its the

 epoch: 6970 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 46%|████▋     | 6972/15000 [13:53<14:06,  9.48it/s]


 epoch: 6971 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 6972 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6973 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 47%|████▋     | 6976/15000 [13:54<19:19,  6.92it/s]


 epoch: 6974 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6975 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 6976 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 47%|████▋     | 6979/15000 [13:54<15:46,  8.48it/s]


 epoch: 6977 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 6978 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6979 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 47%|████▋     | 6981/15000 [13:54<15:24,  8.67it/s]


input:       accessibility where other websites are censored the non governmental organization reporters without borders has used an open minecraft server

target:      accessibility where other websites are censored the non governmental organization reporters without borders has used an open minecraft server to

prediction:  accessibility where other websites are censored the non governmental organization reporters without borders has used an open minecraft server the

 epoch: 6980 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6981 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%


 47%|████▋     | 6984/15000 [13:54<13:56,  9.59it/s]


 epoch: 6982 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.1%

 epoch: 6983 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.3%

 epoch: 6984 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 47%|████▋     | 6986/15000 [13:54<13:21, 10.00it/s]


 epoch: 6985 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 6986 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6987 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 47%|████▋     | 6988/15000 [13:55<22:27,  5.94it/s]


 epoch: 6988 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 6989 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

input:       his three surviving sons constantine ii was killed in conflict with his youngest brother in constans was himself killed

target:      his three surviving sons constantine ii was killed in conflict with his youngest brother in constans was himself killed in

prediction:  his three surviving sons constantine ii was killed in conflict with his youngest brother in constans was himself killed the

 epoch: 6990 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 47%|████▋     | 6992/15000 [13:55<17:06,  7.80it/s]


 epoch: 6991 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 6992 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 6993 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 47%|████▋     | 6996/15000 [13:56<14:02,  9.50it/s]


 epoch: 6994 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6995 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 6996 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.3%


 47%|████▋     | 6998/15000 [13:56<13:32,  9.85it/s]


 epoch: 6997 | train_loss: 0.27, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 6998 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 6999 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 47%|████▋     | 7000/15000 [13:56<13:56,  9.56it/s]


input:       future research aims to prove or disprove the supersymmetry which extends the standard model of particle physics research on

target:      future research aims to prove or disprove the supersymmetry which extends the standard model of particle physics research on the

prediction:  future research aims to prove or disprove the supersymmetry which extends the standard model of particle physics research on the

 epoch: 7000 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7001 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%


 47%|████▋     | 7004/15000 [13:57<13:28,  9.89it/s]


 epoch: 7002 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7003 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7004 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 47%|████▋     | 7006/15000 [13:57<12:54, 10.32it/s]


 epoch: 7005 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.3%

 epoch: 7006 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7007 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%


 47%|████▋     | 7010/15000 [13:57<12:29, 10.66it/s]


 epoch: 7008 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7009 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%

input:       people in there were million immigrants and born children of immigrants in the united states accounting for nearly

target:      people in there were million immigrants and born children of immigrants in the united states accounting for nearly of

prediction:  people in there were million immigrants and born children of immigrants in the united states accounting for nearly the

 epoch: 7010 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 47%|████▋     | 7012/15000 [13:57<12:28, 10.67it/s]


 epoch: 7011 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7012 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7013 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%


 47%|████▋     | 7016/15000 [13:58<12:20, 10.79it/s]


 epoch: 7014 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7015 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7016 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 47%|████▋     | 7018/15000 [13:58<15:21,  8.67it/s]


 epoch: 7017 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7018 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7019 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.8%


 47%|████▋     | 7020/15000 [13:58<14:59,  8.87it/s]


input:       in san diego estimated practitioners of buddhism at million people of whom are living in southern california

target:      in san diego estimated practitioners of buddhism at million people of whom are living in southern california the

prediction:  in san diego estimated practitioners of buddhism at million people of whom are living in southern california the PAD

 epoch: 7020 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7021 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 47%|████▋     | 7024/15000 [13:59<13:10, 10.09it/s]


 epoch: 7022 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7023 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7024 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 47%|████▋     | 7026/15000 [13:59<12:37, 10.53it/s]


 epoch: 7025 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 7026 | train_loss: 0.22, train_acc: 97.5% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7027 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 47%|████▋     | 7028/15000 [13:59<12:25, 10.70it/s]


 epoch: 7028 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.4%

 epoch: 7029 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.0%

input:       or esperanto there is consensus that the loss of languages harms the cultural diversity of the world it is

target:      or esperanto there is consensus that the loss of languages harms the cultural diversity of the world it is common

prediction:  or esperanto there is consensus that the loss of languages harms the cultural diversity of the world it is the


 47%|████▋     | 7030/15000 [13:59<12:42, 10.46it/s]


 epoch: 7030 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 47%|████▋     | 7032/15000 [14:00<21:32,  6.17it/s]


 epoch: 7031 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7032 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7033 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 47%|████▋     | 7036/15000 [14:00<16:16,  8.16it/s]


 epoch: 7034 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7035 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7036 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 47%|████▋     | 7038/15000 [14:00<14:58,  8.87it/s]


 epoch: 7037 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7038 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7039 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 47%|████▋     | 7040/15000 [14:01<14:43,  9.01it/s]


input:       painted for him leonardo inspired by the story of medusa responded with painting of monster spitting fire that was

target:      painted for him leonardo inspired by the story of medusa responded with painting of monster spitting fire that was so

prediction:  painted for him leonardo inspired by the story of medusa responded with painting of monster spitting fire that was the

 epoch: 7040 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7041 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 47%|████▋     | 7044/15000 [14:01<13:23,  9.90it/s]


 epoch: 7042 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7043 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7044 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 47%|████▋     | 7046/15000 [14:02<23:01,  5.76it/s]


 epoch: 7045 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 7046 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%


 47%|████▋     | 7048/15000 [14:02<20:26,  6.49it/s]


 epoch: 7047 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7048 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 47%|████▋     | 7050/15000 [14:02<21:07,  6.27it/s]


 epoch: 7049 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       discovered than invented the term application program interface without an ing suffix is first recorded in paper called data

target:      discovered than invented the term application program interface without an ing suffix is first recorded in paper called data structures

prediction:  discovered than invented the term application program interface without an ing suffix is first recorded in paper called data the

 epoch: 7050 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 47%|████▋     | 7052/15000 [14:02<18:10,  7.29it/s]


 epoch: 7051 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7052 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 47%|████▋     | 7054/15000 [14:03<17:23,  7.61it/s]


 epoch: 7053 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7054 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 47%|████▋     | 7056/15000 [14:03<17:28,  7.58it/s]


 epoch: 7055 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

 epoch: 7056 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 47%|████▋     | 7058/15000 [14:03<16:13,  8.16it/s]


 epoch: 7057 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7058 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 47%|████▋     | 7059/15000 [14:03<16:14,  8.15it/s]


 epoch: 7059 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.4%

input:       the factors responsible for successful education and how these factors may differ from person to person important factors include

target:      the factors responsible for successful education and how these factors may differ from person to person important factors include intelligence

prediction:  the factors responsible for successful education and how these factors may differ from person to person important factors include the


 47%|████▋     | 7061/15000 [14:04<28:15,  4.68it/s]


 epoch: 7060 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7061 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 47%|████▋     | 7063/15000 [14:04<21:25,  6.17it/s]


 epoch: 7062 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7063 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 47%|████▋     | 7065/15000 [14:04<18:38,  7.09it/s]


 epoch: 7064 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7065 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 47%|████▋     | 7067/15000 [14:05<16:43,  7.91it/s]


 epoch: 7066 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7067 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 47%|████▋     | 7069/15000 [14:05<15:54,  8.31it/s]


 epoch: 7068 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 7069 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       traditions of rome prior to christian dominion is found into the th century with the saturnalia of macrobius and


 47%|████▋     | 7071/15000 [14:05<16:17,  8.11it/s]


target:      traditions of rome prior to christian dominion is found into the th century with the saturnalia of macrobius and the

prediction:  traditions of rome prior to christian dominion is found into the th century with the saturnalia of macrobius and the

 epoch: 7070 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7071 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 47%|████▋     | 7072/15000 [14:05<15:47,  8.37it/s]


 epoch: 7072 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7073 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 47%|████▋     | 7076/15000 [14:06<21:20,  6.19it/s]


 epoch: 7074 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7075 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7076 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.2%


 47%|████▋     | 7079/15000 [14:06<16:24,  8.04it/s]


 epoch: 7077 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7078 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7079 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 47%|████▋     | 7081/15000 [14:07<15:50,  8.33it/s]


input:       in home or public schooling boys and girls received primary education generally from ages to but classes were not

target:      in home or public schooling boys and girls received primary education generally from ages to but classes were not segregated

prediction:  in home or public schooling boys and girls received primary education generally from ages to but classes were not the

 epoch: 7080 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7081 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 47%|████▋     | 7084/15000 [14:07<13:42,  9.63it/s]


 epoch: 7082 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7083 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7084 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 47%|████▋     | 7087/15000 [14:07<12:43, 10.36it/s]


 epoch: 7085 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7086 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7087 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 47%|████▋     | 7089/15000 [14:07<14:33,  9.05it/s]


 epoch: 7088 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7089 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       peace and prosperity he was so loved that he came to hold the power of monarch de facto if

target:      peace and prosperity he was so loved that he came to hold the power of monarch de facto if not

prediction:  peace and prosperity he was so loved that he came to hold the power of monarch de facto if the


 47%|████▋     | 7092/15000 [14:08<13:54,  9.47it/s]


 epoch: 7090 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7091 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7092 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 47%|████▋     | 7095/15000 [14:08<12:46, 10.32it/s]


 epoch: 7093 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7094 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7095 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%


 47%|████▋     | 7097/15000 [14:08<12:38, 10.41it/s]


 epoch: 7096 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7097 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7098 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 47%|████▋     | 7099/15000 [14:08<12:29, 10.54it/s]


 epoch: 7099 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

input:       aurelius fourteen children are known to have reached adulthood the importance of good diet to health was recognized by

target:      aurelius fourteen children are known to have reached adulthood the importance of good diet to health was recognized by medical

prediction:  aurelius fourteen children are known to have reached adulthood the importance of good diet to health was recognized by the

 epoch: 7100 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.1%


 47%|████▋     | 7101/15000 [14:09<13:11,  9.98it/s]


 epoch: 7101 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7102 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 47%|████▋     | 7105/15000 [14:09<18:37,  7.07it/s]


 epoch: 7103 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7104 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7105 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 47%|████▋     | 7108/15000 [14:10<15:20,  8.58it/s]


 epoch: 7106 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7107 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7108 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 47%|████▋     | 7110/15000 [14:10<14:46,  8.90it/s]


 epoch: 7109 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       limited to three contributors per project previously only public repositories were free on april github made all of the

target:      limited to three contributors per project previously only public repositories were free on april github made all of the core

prediction:  limited to three contributors per project previously only public repositories were free on april github made all of the the

 epoch: 7110 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.4%

 epoch: 7111 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 47%|████▋     | 7114/15000 [14:10<13:10,  9.97it/s]


 epoch: 7112 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7113 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7114 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 47%|████▋     | 7116/15000 [14:10<13:01, 10.08it/s]


 epoch: 7115 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 7116 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 47%|████▋     | 7118/15000 [14:11<21:40,  6.06it/s]


 epoch: 7117 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7118 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7119 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 47%|████▋     | 7120/15000 [14:11<19:22,  6.78it/s]


input:       overwhelming majority of south americans speak spanish or portuguese and societies and states are rich in western traditions relative

target:      overwhelming majority of south americans speak spanish or portuguese and societies and states are rich in western traditions relative to

prediction:  overwhelming majority of south americans speak spanish or portuguese and societies and states are rich in western traditions relative the

 epoch: 7120 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%

 epoch: 7121 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 47%|████▋     | 7124/15000 [14:12<15:17,  8.58it/s]


 epoch: 7122 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7123 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7124 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%


 48%|████▊     | 7126/15000 [14:12<14:34,  9.01it/s]


 epoch: 7125 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7126 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7127 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%


 48%|████▊     | 7130/15000 [14:12<13:34,  9.67it/s]


 epoch: 7128 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7129 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

input:       us which indicates masculine gender singular number and nominative case these languages are called fusional languages because several meanings

target:      us which indicates masculine gender singular number and nominative case these languages are called fusional languages because several meanings may

prediction:  us which indicates masculine gender singular number and nominative case these languages are called fusional languages because several meanings the

 epoch: 7130 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 48%|████▊     | 7132/15000 [14:12<14:37,  8.97it/s]


 epoch: 7131 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7132 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 7133 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 48%|████▊     | 7136/15000 [14:13<12:50, 10.21it/s]


 epoch: 7134 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%

 epoch: 7135 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7136 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 48%|████▊     | 7138/15000 [14:13<12:22, 10.60it/s]


 epoch: 7137 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.4%

 epoch: 7138 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7139 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 48%|████▊     | 7140/15000 [14:13<12:55, 10.13it/s]


input:       is widespread religion in asia with more than million adherents according to pew research center in and nearly million

target:      is widespread religion in asia with more than million adherents according to pew research center in and nearly million according

prediction:  is widespread religion in asia with more than million adherents according to pew research center in and nearly million the

 epoch: 7140 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7141 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 48%|████▊     | 7144/15000 [14:13<12:31, 10.45it/s]


 epoch: 7142 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7143 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7144 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7145 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 48%|████▊     | 7147/15000 [14:14<20:27,  6.40it/s]


 epoch: 7146 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7147 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 7148 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 48%|████▊     | 7150/15000 [14:15<16:51,  7.76it/s]


 epoch: 7149 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%

input:       to find out whether process in the very early universe has reached thermal equilibrium is the ratio between the

target:      to find out whether process in the very early universe has reached thermal equilibrium is the ratio between the rate

prediction:  to find out whether process in the very early universe has reached thermal equilibrium is the ratio between the the

 epoch: 7150 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 48%|████▊     | 7152/15000 [14:15<15:02,  8.70it/s]


 epoch: 7151 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7152 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.6%

 epoch: 7153 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 48%|████▊     | 7155/15000 [14:15<14:16,  9.16it/s]


 epoch: 7154 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%

 epoch: 7155 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 48%|████▊     | 7157/15000 [14:15<15:13,  8.59it/s]


 epoch: 7156 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7157 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%


 48%|████▊     | 7159/15000 [14:16<15:08,  8.63it/s]


 epoch: 7158 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7159 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

input:       planned but then disaster struck the illyrian tribes revolted and had to be crushed and three full legions under


 48%|████▊     | 7161/15000 [14:16<16:58,  7.70it/s]


target:      planned but then disaster struck the illyrian tribes revolted and had to be crushed and three full legions under the

prediction:  planned but then disaster struck the illyrian tribes revolted and had to be crushed and three full legions under the

 epoch: 7160 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7161 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 48%|████▊     | 7163/15000 [14:16<16:54,  7.73it/s]


 epoch: 7162 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7163 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%


 48%|████▊     | 7165/15000 [14:16<16:36,  7.86it/s]


 epoch: 7164 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7165 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 48%|████▊     | 7167/15000 [14:17<15:15,  8.55it/s]


 epoch: 7166 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7167 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 48%|████▊     | 7169/15000 [14:17<16:18,  8.00it/s]


 epoch: 7168 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7169 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 48%|████▊     | 7170/15000 [14:17<18:20,  7.11it/s]


input:       one can also use powershell embedded in management application which uses the powershell runtime to implement the management functionality

target:      one can also use powershell embedded in management application which uses the powershell runtime to implement the management functionality for

prediction:  one can also use powershell embedded in management application which uses the powershell runtime to implement the management functionality the

 epoch: 7170 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 48%|████▊     | 7172/15000 [14:17<16:10,  8.06it/s]


 epoch: 7171 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7172 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%


 48%|████▊     | 7173/15000 [14:17<15:43,  8.29it/s]


 epoch: 7173 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 48%|████▊     | 7175/15000 [14:18<18:53,  6.90it/s]


 epoch: 7174 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7175 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 48%|████▊     | 7177/15000 [14:18<17:02,  7.65it/s]


 epoch: 7176 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7177 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 48%|████▊     | 7179/15000 [14:18<15:31,  8.40it/s]


 epoch: 7178 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.19, test_acc: 97.4%

 epoch: 7179 | train_loss: 0.23, train_acc: 96.7% | test_loss: 0.22, test_acc: 97.2%

input:       being understood as that which has been brought into being by human consciousness or human mind depending on the

target:     

 48%|████▊     | 7181/15000 [14:18<16:53,  7.72it/s]

 being understood as that which has been brought into being by human consciousness or human mind depending on the particular

prediction:  being understood as that which has been brought into being by human consciousness or human mind depending on the the

 epoch: 7180 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7181 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 48%|████▊     | 7183/15000 [14:19<16:29,  7.90it/s]


 epoch: 7182 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7183 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 48%|████▊     | 7185/15000 [14:19<16:18,  7.99it/s]


 epoch: 7184 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7185 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 48%|████▊     | 7187/15000 [14:19<16:13,  8.02it/s]


 epoch: 7186 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7187 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 48%|████▊     | 7188/15000 [14:19<15:53,  8.19it/s]


 epoch: 7188 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.4%


 48%|████▊     | 7190/15000 [14:20<29:19,  4.44it/s]


 epoch: 7189 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

input:       of the model an attempt to find more suitable alternative was not successful the big bang models developed from

target:      of the model an attempt to find more suitable alternative was not successful the big bang models developed from observations

prediction:  of the model an attempt to find more suitable alternative was not successful the big bang models developed from the

 epoch: 7190 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 48%|████▊     | 7193/15000 [14:20<18:28,  7.05it/s]


 epoch: 7191 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7192 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7193 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 48%|████▊     | 7195/15000 [14:20<15:42,  8.29it/s]


 epoch: 7194 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 7195 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7196 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 48%|████▊     | 7199/15000 [14:21<13:11,  9.86it/s]


 epoch: 7197 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.8%

 epoch: 7198 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7199 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 48%|████▊     | 7201/15000 [14:21<14:09,  9.18it/s]


input:       the west and the sea peoples conjectured confederation of seafarers from the aegean sea initially the military was able

target:      the west and the sea peoples conjectured confederation of seafarers from the aegean sea initially the military was able to

prediction:  the west and the sea peoples conjectured confederation of seafarers from the aegean sea initially the military was able the

 epoch: 7200 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7201 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 48%|████▊     | 7202/15000 [14:21<14:03,  9.25it/s]


 epoch: 7202 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 48%|████▊     | 7205/15000 [14:22<20:28,  6.35it/s]


 epoch: 7203 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%

 epoch: 7204 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7205 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 48%|████▊     | 7208/15000 [14:22<15:56,  8.14it/s]


 epoch: 7206 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7207 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7208 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 48%|████▊     | 7210/15000 [14:22<15:03,  8.63it/s]


 epoch: 7209 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

input:       and political career on june he became one of only four people ever to be recognized with honorary citizenship

target:      and political career on june he became one of only four people ever to be recognized with honorary citizenship by

prediction:  and political career on june he became one of only four people ever to be recognized with honorary citizenship the

 epoch: 7210 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7211 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 48%|████▊     | 7214/15000 [14:23<12:52, 10.08it/s]


 epoch: 7212 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7213 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7214 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 48%|████▊     | 7216/15000 [14:23<12:35, 10.31it/s]


 epoch: 7215 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7216 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7217 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 48%|████▊     | 7220/15000 [14:24<18:31,  7.00it/s]


 epoch: 7218 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7219 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

input:       opportunity to create more general purpose solution to microsoft problem of administrative automation by microsoft had started to develop

target:      opportunity to create more general purpose solution to microsoft problem of administrative automation by microsoft had started to develop new

prediction:  opportunity to create more general purpose solution to microsoft problem of administrative automation by microsoft had started to develop the

 epoch: 7220 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.19, test_acc: 97.4%


 48%|████▊     | 7222/15000 [14:24<16:17,  7.96it/s]


 epoch: 7221 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.1%

 epoch: 7222 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 7223 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 48%|████▊     | 7226/15000 [14:24<13:27,  9.63it/s]


 epoch: 7224 | train_loss: 0.22, train_acc: 97.5% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7225 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7226 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 48%|████▊     | 7228/15000 [14:24<12:56, 10.01it/s]


 epoch: 7227 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7228 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7229 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 48%|████▊     | 7230/15000 [14:25<13:20,  9.71it/s]


input:       well maintained classrooms and suitable classroom furniture as well as the availability of library and canteen tend to contribute

target:      well maintained classrooms and suitable classroom furniture as well as the availability of library and canteen tend to contribute to

prediction:  well maintained classrooms and suitable classroom furniture as well as the availability of library and canteen tend to contribute the

 epoch: 7230 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7231 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 48%|████▊     | 7234/15000 [14:25<18:18,  7.07it/s]


 epoch: 7232 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7233 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7234 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 48%|████▊     | 7236/15000 [14:26<16:05,  8.04it/s]


 epoch: 7235 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7236 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%

 epoch: 7237 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.0%


 48%|████▊     | 7240/15000 [14:26<14:01,  9.23it/s]


 epoch: 7238 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7239 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       bc to the invasion by xerxes in bc this period saw the expansion of the greek world around the

target:      bc to the invasion by xerxes in bc this period saw the expansion of the greek world around the mediterranean

prediction:  bc to the invasion by xerxes in bc this period saw the expansion of the greek world around the the

 epoch: 7240 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 48%|████▊     | 7242/15000 [14:26<13:27,  9.60it/s]


 epoch: 7241 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7242 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.4%

 epoch: 7243 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 48%|████▊     | 7244/15000 [14:26<12:52, 10.04it/s]


 epoch: 7244 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7245 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 48%|████▊     | 7248/15000 [14:27<18:19,  7.05it/s]


 epoch: 7246 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7247 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7248 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 48%|████▊     | 7250/15000 [14:27<16:50,  7.67it/s]


 epoch: 7249 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

input:       of his procedures for measuring reaction time and sensation he is considered an inventor of modern mental testing also

target:      of his procedures for measuring reaction time and sensation he is considered an inventor of modern mental testing also known

prediction:  of his procedures for measuring reaction time and sensation he is considered an inventor of modern mental testing also the

 epoch: 7250 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 48%|████▊     | 7252/15000 [14:28<15:15,  8.46it/s]


 epoch: 7251 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7252 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7253 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 48%|████▊     | 7256/15000 [14:28<13:13,  9.76it/s]


 epoch: 7254 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7255 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7256 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 48%|████▊     | 7258/15000 [14:28<12:49, 10.06it/s]


 epoch: 7257 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7258 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7259 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       europe and asia followed ptolemy with the boundary following the turkish straits the black sea the kerch strait the

target:      europe and asia followed ptolemy with the boundary following the turkish straits the black sea the kerch strait the sea

prediction:  europe and asia followed ptolemy with the boundary following the turkish straits the black sea the kerch strait the the


 48%|████▊     | 7262/15000 [14:29<18:49,  6.85it/s]


 epoch: 7260 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.8%

 epoch: 7261 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7262 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 48%|████▊     | 7264/15000 [14:29<16:38,  7.75it/s]


 epoch: 7263 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7264 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7265 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 48%|████▊     | 7268/15000 [14:29<14:29,  8.90it/s]


 epoch: 7266 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7267 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7268 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 48%|████▊     | 7270/15000 [14:30<15:02,  8.56it/s]


 epoch: 7269 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

input:       some lineages suggesting time depth of thousand years santa cruz population located in remote oceania is an anomaly with

target:      some lineages suggesting time depth of thousand years santa cruz population located in remote oceania is an anomaly with extreme

prediction:  some lineages suggesting time depth of thousand years santa cruz population located in remote oceania is an anomaly with the

 epoch: 7270 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 48%|████▊     | 7272/15000 [14:30<15:26,  8.34it/s]


 epoch: 7271 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 7272 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 48%|████▊     | 7274/15000 [14:30<15:53,  8.10it/s]


 epoch: 7273 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7274 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.2%


 49%|████▊     | 7276/15000 [14:31<25:20,  5.08it/s]


 epoch: 7275 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 7276 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 49%|████▊     | 7278/15000 [14:31<21:18,  6.04it/s]


 epoch: 7277 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7278 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 49%|████▊     | 7280/15000 [14:31<20:01,  6.43it/s]


 epoch: 7279 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

input:       species of thinking and its right to exist is coextensive with its power of resisting extinction by its rivals

target:      species of thinking and its right to exist is coextensive with its power of resisting extinction by its rivals in

prediction:  species of thinking and its right to exist is coextensive with its power of resisting extinction by its rivals the

 epoch: 7280 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 49%|████▊     | 7282/15000 [14:32<16:29,  7.80it/s]


 epoch: 7281 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7282 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%

 epoch: 7283 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%


 49%|████▊     | 7285/15000 [14:32<14:55,  8.61it/s]


 epoch: 7284 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7285 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 49%|████▊     | 7287/15000 [14:32<15:15,  8.42it/s]


 epoch: 7286 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7287 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 49%|████▊     | 7289/15000 [14:32<15:46,  8.15it/s]


 epoch: 7288 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7289 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 49%|████▊     | 7290/15000 [14:33<18:23,  6.99it/s]


input:       augustus in establishing principles of dynastic succession was limited by his outliving number of talented potential heirs the julio

target:      augustus in establishing principles of dynastic succession was limited by his outliving number of talented potential heirs the julio claudian

prediction:  augustus in establishing principles of dynastic succession was limited by his outliving number of talented potential heirs the julio the

 epoch: 7290 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 49%|████▊     | 7292/15000 [14:33<16:53,  7.61it/s]


 epoch: 7291 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7292 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.1%


 49%|████▊     | 7296/15000 [14:33<13:18,  9.65it/s]


 epoch: 7293 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

 epoch: 7294 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7295 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7296 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 49%|████▊     | 7299/15000 [14:33<12:39, 10.14it/s]


 epoch: 7297 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7298 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7299 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 49%|████▊     | 7300/15000 [14:34<13:47,  9.30it/s]


input:       widespread religion in asia with more than million adherents according to pew research center in and nearly million according

target:      widespread religion in asia with more than million adherents according to pew research center in and nearly million according to

prediction:  widespread religion in asia with more than million adherents according to pew research center in and nearly million according the

 epoch: 7300 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7301 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 49%|████▊     | 7302/15000 [14:34<13:05,  9.80it/s]


 epoch: 7302 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 49%|████▊     | 7305/15000 [14:35<20:12,  6.35it/s]


 epoch: 7303 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7304 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7305 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 49%|████▊     | 7307/15000 [14:35<17:02,  7.52it/s]


 epoch: 7306 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 7307 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7308 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 49%|████▊     | 7310/15000 [14:35<15:14,  8.41it/s]


 epoch: 7309 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       systems for tracking versions of source code source code control system sccs and revision control system rcs worked on

target:      systems for tracking versions of source code source code control system sccs and revision control system rcs worked on individual

prediction:  systems for tracking versions of source code source code control system sccs and revision control system rcs worked on the

 epoch: 7310 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 49%|████▊     | 7312/15000 [14:35<14:36,  8.77it/s]


 epoch: 7311 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7312 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%


 49%|████▉     | 7314/15000 [14:35<14:12,  9.01it/s]


 epoch: 7313 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 7314 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 49%|████▉     | 7316/15000 [14:36<13:26,  9.53it/s]


 epoch: 7315 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7316 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7317 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 49%|████▉     | 7320/15000 [14:36<19:32,  6.55it/s]


 epoch: 7318 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7319 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

input:       raids the rivers rhine and danube became the permanent borders of the roman empire in the north in ad

target:      raids the rivers rhine and danube became the permanent borders of the roman empire in the north in ad augustus

prediction:  raids the rivers rhine and danube became the permanent borders of the roman empire in the north in ad the

 epoch: 7320 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 49%|████▉     | 7322/15000 [14:37<16:46,  7.63it/s]


 epoch: 7321 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7322 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 7323 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 49%|████▉     | 7325/15000 [14:37<14:42,  8.69it/s]


 epoch: 7324 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7325 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7326 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 49%|████▉     | 7327/15000 [14:37<13:47,  9.27it/s]



 epoch: 7327 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7328 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 49%|████▉     | 7329/15000 [14:37<12:52,  9.93it/s]


 epoch: 7329 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       is more recently settled although the majority of the oceanian islands lie in the south pacific few of them

target:      is more recently settled although the majority of the oceanian islands lie in the south pacific few of them are

prediction:  is more recently settled although the majority of the oceanian islands lie in the south pacific few of them the

 epoch: 7330 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 49%|████▉     | 7331/15000 [14:38<13:40,  9.34it/s]


 epoch: 7331 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 49%|████▉     | 7334/15000 [14:38<14:48,  8.63it/s]


 epoch: 7332 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7333 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7334 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 49%|████▉     | 7337/15000 [14:38<13:24,  9.53it/s]


 epoch: 7335 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7336 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7337 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 49%|████▉     | 7338/15000 [14:38<13:20,  9.57it/s]


 epoch: 7338 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7339 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

input:       were domesticated followed by okra and kola nuts since most of the plants grew in the forest the niger

target:      were domesticated followed by okra and kola nuts since most of the plants grew in the forest the niger congo

prediction:  were domesticated followed by okra and kola nuts since most of the plants grew in the forest the niger the


 49%|████▉     | 7341/15000 [14:39<13:16,  9.62it/s]


 epoch: 7340 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.4%

 epoch: 7341 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 49%|████▉     | 7344/15000 [14:39<12:50,  9.94it/s]


 epoch: 7342 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7343 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7344 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 49%|████▉     | 7345/15000 [14:39<13:13,  9.65it/s]


 epoch: 7345 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 49%|████▉     | 7348/15000 [14:40<21:09,  6.03it/s]


 epoch: 7346 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7347 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7348 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 49%|████▉     | 7350/15000 [14:40<18:21,  6.95it/s]


 epoch: 7349 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

input:       cle dominated by louis xiv the scientific revolution the world first public company and megacorporation known as the dutch

target:      cle dominated by louis xiv the scientific revolution the world first public company and megacorporation known as the dutch east

prediction:  cle dominated by louis xiv the scientific revolution the world first public company and megacorporation known as the dutch the

 epoch: 7350 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7351 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 49%|████▉     | 7354/15000 [14:40<14:15,  8.93it/s]


 epoch: 7352 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.21, test_acc: 97.0%

 epoch: 7353 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 7354 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%


 49%|████▉     | 7357/15000 [14:41<13:47,  9.24it/s]


 epoch: 7355 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7356 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7357 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 49%|████▉     | 7358/15000 [14:41<13:34,  9.38it/s]


 epoch: 7358 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7359 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       their physical and biological environments the theory was first set out in detail in darwin book on the origin

target:      their physical and biological environments the theory was first set out in detail in darwin book on the origin of

prediction:  their physical and biological environments the theory was first set out in detail in darwin book on the origin the


 49%|████▉     | 7360/15000 [14:41<13:50,  9.20it/s]


 epoch: 7360 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 49%|████▉     | 7363/15000 [14:42<20:06,  6.33it/s]


 epoch: 7361 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7362 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7363 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 49%|████▉     | 7366/15000 [14:42<15:56,  7.98it/s]


 epoch: 7364 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7365 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7366 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%


 49%|████▉     | 7369/15000 [14:42<13:55,  9.13it/s]


 epoch: 7367 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7368 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7369 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 49%|████▉     | 7371/15000 [14:43<14:18,  8.89it/s]


input:       of labour this organization led to the necessity of record keeping and the development of writing babylonia was an

target:      of labour this organization led to the necessity of record keeping and the development of writing babylonia was an amorite

prediction:  of labour this organization led to the necessity of record keeping and the development of writing babylonia was an the

 epoch: 7370 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7371 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 49%|████▉     | 7374/15000 [14:43<13:20,  9.53it/s]


 epoch: 7372 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 7373 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%

 epoch: 7374 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 49%|████▉     | 7377/15000 [14:43<17:55,  7.09it/s]


 epoch: 7375 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7376 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7377 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 49%|████▉     | 7379/15000 [14:44<16:13,  7.83it/s]


 epoch: 7378 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7379 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       the opposition for three year term in the politics of papua new guinea the prime minister is the head

target:      the opposition for three year term in the politics of papua new guinea the prime minister is the head of


 49%|████▉     | 7381/15000 [14:44<15:38,  8.12it/s]


prediction:  the opposition for three year term in the politics of papua new guinea the prime minister is the head the

 epoch: 7380 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7381 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 49%|████▉     | 7384/15000 [14:44<13:43,  9.25it/s]


 epoch: 7382 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7383 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7384 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%


 49%|████▉     | 7386/15000 [14:44<13:25,  9.45it/s]


 epoch: 7385 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7386 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 49%|████▉     | 7388/15000 [14:45<13:25,  9.45it/s]


 epoch: 7387 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7388 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 49%|████▉     | 7389/15000 [14:45<14:11,  8.94it/s]


 epoch: 7389 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       for example the overwhelming majority of species are microscopic prokaryotes which form about half the world biomass despite their

target:      for example the overwhelming majority of species are microscopic prokaryotes which form about half the world biomass despite their small

prediction:  for example the overwhelming majority of species are microscopic prokaryotes which form about half the world biomass despite their of


 49%|████▉     | 7391/15000 [14:45<25:19,  5.01it/s]


 epoch: 7390 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7391 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 49%|████▉     | 7393/15000 [14:46<20:59,  6.04it/s]


 epoch: 7392 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7393 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 49%|████▉     | 7395/15000 [14:46<18:21,  6.91it/s]


 epoch: 7394 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7395 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 49%|████▉     | 7397/15000 [14:46<17:01,  7.44it/s]


 epoch: 7396 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7397 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 49%|████▉     | 7399/15000 [14:46<15:12,  8.33it/s]


 epoch: 7398 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7399 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       the english speaking world oceania is described as one of the continents in this model of the world australia


 49%|████▉     | 7401/15000 [14:47<16:33,  7.65it/s]


target:      the english speaking world oceania is described as one of the continents in this model of the world australia is

prediction:  the english speaking world oceania is described as one of the continents in this model of the world australia the

 epoch: 7400 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7401 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 49%|████▉     | 7402/15000 [14:47<16:06,  7.86it/s]


 epoch: 7402 | train_loss: 0.19, train_acc: 97.4% | test_loss: 0.20, test_acc: 97.2%

 epoch: 7403 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 49%|████▉     | 7406/15000 [14:47<17:19,  7.31it/s]


 epoch: 7404 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7405 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7406 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 49%|████▉     | 7408/15000 [14:48<15:20,  8.25it/s]


 epoch: 7407 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7408 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7409 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 49%|████▉     | 7410/15000 [14:48<14:53,  8.50it/s]


input:       financial resources or access to global markets relatively stable countries such as kenya still experienced only very slow economic

target:      financial resources or access to global markets relatively stable countries such as kenya still experienced only very slow economic development

prediction:  financial resources or access to global markets relatively stable countries such as kenya still experienced only very slow economic the

 epoch: 7410 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 7411 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 49%|████▉     | 7414/15000 [14:48<12:49,  9.85it/s]


 epoch: 7412 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 7413 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7414 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 49%|████▉     | 7416/15000 [14:48<12:34, 10.06it/s]


 epoch: 7415 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7416 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 7417 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 49%|████▉     | 7418/15000 [14:49<21:15,  5.94it/s]


 epoch: 7418 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7419 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

input:       he studied he solved with ease the th century brought particular admiration for leonardo genius causing henry fuseli to

target:      he studied he solved with ease the th century brought particular admiration for leonardo genius causing henry fuseli to write

prediction:  he studied he solved with ease the th century brought particular admiration for leonardo genius causing henry fuseli to the


 49%|████▉     | 7422/15000 [14:49<16:32,  7.64it/s]


 epoch: 7420 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.4%

 epoch: 7421 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%

 epoch: 7422 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 49%|████▉     | 7424/15000 [14:50<14:56,  8.45it/s]


 epoch: 7423 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7424 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7425 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 50%|████▉     | 7428/15000 [14:50<13:12,  9.56it/s]


 epoch: 7426 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7427 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7428 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 50%|████▉     | 7430/15000 [14:50<13:31,  9.33it/s]


 epoch: 7429 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

input:       australia new zealand norfolk island and hawai together under an anglonesia category this is in spite of the geographical

target:      australia new zealand norfolk island and hawai together under an anglonesia category this is in spite of the geographical distance

prediction:  australia new zealand norfolk island and hawai together under an anglonesia category this is in spite of the geographical the

 epoch: 7430 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%


 50%|████▉     | 7432/15000 [14:50<13:02,  9.67it/s]


 epoch: 7431 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7432 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 50%|████▉     | 7434/15000 [14:51<21:09,  5.96it/s]


 epoch: 7433 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7434 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7435 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 50%|████▉     | 7438/15000 [14:51<15:52,  7.94it/s]


 epoch: 7436 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7437 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7438 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 50%|████▉     | 7440/15000 [14:51<15:18,  8.23it/s]


 epoch: 7439 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

input:       modern geology based on the unity of processes in time and explanation of the earth past from the present

target:      modern geology based on the unity of processes in time and explanation of the earth past from the present james

prediction:  modern geology based on the unity of processes in time and explanation of the earth past from the present the

 epoch: 7440 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 50%|████▉     | 7442/15000 [14:52<13:54,  9.06it/s]


 epoch: 7441 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7442 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7443 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 50%|████▉     | 7446/15000 [14:52<12:13, 10.29it/s]


 epoch: 7444 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7445 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7446 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 50%|████▉     | 7448/15000 [14:52<15:03,  8.35it/s]


 epoch: 7447 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7448 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 7449 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 50%|████▉     | 7450/15000 [14:53<14:35,  8.62it/s]


input:       came to hold the power of monarch de facto if not de jure during the years of his rule

target:      came to hold the power of monarch de facto if not de jure during the years of his rule new

prediction:  came to hold the power of monarch de facto if not de jure during the years of his rule the

 epoch: 7450 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7451 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.3%


 50%|████▉     | 7454/15000 [14:53<12:49,  9.81it/s]


 epoch: 7452 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 7453 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.20, test_acc: 97.5%

 epoch: 7454 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 50%|████▉     | 7456/15000 [14:53<12:17, 10.23it/s]


 epoch: 7455 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7456 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7457 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 50%|████▉     | 7458/15000 [14:53<12:09, 10.33it/s]


 epoch: 7458 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7459 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

input:       but the chromosome snps indicated that they had different fathers suggesting that they were half brothers the snp identities

target:      but the chromosome snps indicated that they had different fathers suggesting that they were half brothers the snp identities were

prediction:  but the chromosome snps indicated that they had different fathers suggesting that they were half brothers the snp identities the


 50%|████▉     | 7460/15000 [14:54<12:45,  9.85it/s]


 epoch: 7460 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 50%|████▉     | 7462/15000 [14:54<21:40,  5.80it/s]


 epoch: 7461 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7462 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7463 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 50%|████▉     | 7466/15000 [14:55<16:03,  7.82it/s]


 epoch: 7464 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7465 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7466 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 50%|████▉     | 7468/15000 [14:55<14:29,  8.66it/s]


 epoch: 7467 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7468 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 7469 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 50%|████▉     | 7470/15000 [14:55<14:24,  8.71it/s]


input:       was released on ios android and windows phone pocket edition was remade into the bedrock edition in enabling cross

target:      was released on ios android and windows phone pocket edition was remade into the bedrock edition in enabling cross platform

prediction:  was released on ios android and windows phone pocket edition was remade into the bedrock edition in enabling cross the

 epoch: 7470 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7471 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 50%|████▉     | 7474/15000 [14:55<12:44,  9.85it/s]


 epoch: 7472 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7473 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7474 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7475 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 50%|████▉     | 7477/15000 [14:56<18:54,  6.63it/s]


 epoch: 7476 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7477 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7478 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 50%|████▉     | 7480/15000 [14:56<16:09,  7.76it/s]


 epoch: 7479 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.8%

input:       nd to th centuries sarcophagus relief has been called the richest single source of roman iconography depicting mythological scenes

target:      nd to th centuries sarcophagus relief has been called the richest single source of roman iconography depicting mythological scenes or

prediction:  nd to th centuries sarcophagus relief has been called the richest single source of roman iconography depicting mythological scenes the

 epoch: 7480 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 50%|████▉     | 7482/15000 [14:56<14:25,  8.68it/s]


 epoch: 7481 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7482 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7483 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 50%|████▉     | 7486/15000 [14:57<12:54,  9.71it/s]


 epoch: 7484 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 7485 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7486 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.3%


 50%|████▉     | 7488/15000 [14:57<13:23,  9.35it/s]


 epoch: 7487 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7488 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 50%|████▉     | 7489/15000 [14:57<14:04,  8.89it/s]


 epoch: 7489 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

input:       github and had grown to host unique public repositories having been forked at least once for total of repositories

target:      github and had grown to host unique public repositories having been forked at least once for total of repositories in

prediction:  github and had grown to host unique public repositories having been forked at least once for total of repositories the


 50%|████▉     | 7491/15000 [14:58<18:57,  6.60it/s]


 epoch: 7490 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7491 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 50%|████▉     | 7493/15000 [14:58<17:02,  7.34it/s]


 epoch: 7492 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7493 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 50%|████▉     | 7495/15000 [14:58<16:25,  7.62it/s]


 epoch: 7494 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7495 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 50%|████▉     | 7497/15000 [14:58<15:49,  7.90it/s]


 epoch: 7496 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7497 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 50%|████▉     | 7499/15000 [14:59<15:05,  8.28it/s]


 epoch: 7498 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7499 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

input:      

 50%|█████     | 7500/15000 [14:59<17:37,  7.09it/s]

 known as classical conditioning they demonstrated that when biologically potent stimulus food that elicits salivation is paired with

target:      known as classical conditioning they demonstrated that when biologically potent stimulus food that elicits salivation is paired with previously

prediction:  known as classical conditioning they demonstrated that when biologically potent stimulus food that elicits salivation is paired with the

 epoch: 7500 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 50%|█████     | 7502/15000 [14:59<16:58,  7.36it/s]


 epoch: 7501 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.1%

 epoch: 7502 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 50%|█████     | 7503/15000 [14:59<16:52,  7.40it/s]


 epoch: 7503 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 50%|█████     | 7505/15000 [15:00<28:49,  4.33it/s]


 epoch: 7504 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 7505 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%


 50%|█████     | 7507/15000 [15:00<21:24,  5.83it/s]


 epoch: 7506 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.4%

 epoch: 7507 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 50%|█████     | 7510/15000 [15:00<16:05,  7.76it/s]


 epoch: 7508 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7509 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       day colombia in nd with thousand barrels day ecuador in th with thousand barrels day and argentina th with

target:      day colombia in nd with thousand barrels day ecuador in th with thousand barrels day and argentina th with thousand

prediction:  day colombia in nd with thousand barrels day ecuador in th with thousand barrels day and argentina th with the

 epoch: 7510 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 50%|█████     | 7512/15000 [15:01<14:05,  8.86it/s]


 epoch: 7511 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7512 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7513 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 50%|█████     | 7516/15000 [15:01<12:27, 10.01it/s]


 epoch: 7514 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7515 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7516 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 50%|█████     | 7518/15000 [15:01<12:26, 10.02it/s]


 epoch: 7517 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 96.9%

 epoch: 7518 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 50%|█████     | 7520/15000 [15:02<20:49,  5.99it/s]


 epoch: 7519 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       breaking down mental processes into the most basic components motivated in part by an analogy to recent advances in

target:      breaking down mental processes into the most basic components motivated in part by an analogy to recent advances in chemistry

prediction:  breaking down mental processes into the most basic components motivated in part by an analogy to recent advances in the

 epoch: 7520 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7521 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 50%|█████     | 7524/15000 [15:02<15:28,  8.05it/s]


 epoch: 7522 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 7523 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7524 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%


 50%|█████     | 7527/15000 [15:02<13:41,  9.10it/s]


 epoch: 7525 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7526 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 7527 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 50%|█████     | 7529/15000 [15:03<12:57,  9.61it/s]


 epoch: 7528 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7529 | train_loss: 0.23, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.1%

input:       in the atlantic slave trade in after cultivation of the highly profitable cotton crop exploded in the deep south

target:      in the atlantic slave trade in after cultivation of the highly profitable cotton crop exploded in the deep south and

prediction:  in the atlantic slave trade in after cultivation of the highly profitable cotton crop exploded in the deep south the


 50%|█████     | 7531/15000 [15:03<13:01,  9.56it/s]


 epoch: 7530 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7531 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7532 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 50%|█████     | 7535/15000 [15:03<12:17, 10.13it/s]


 epoch: 7533 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7534 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7535 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 50%|█████     | 7537/15000 [15:03<11:46, 10.57it/s]


 epoch: 7536 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7537 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7538 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 50%|█████     | 7541/15000 [15:04<11:36, 10.70it/s]


 epoch: 7539 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

input:       cornu long tubular metal wind instrument was used for military signals and on parade these instruments spread throughout the

target:      cornu long tubular metal wind instrument was used for military signals and on parade these instruments spread throughout the provinces

prediction:  cornu long tubular metal wind instrument was used for military signals and on parade these instruments spread throughout the the

 epoch: 7540 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7541 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%


 50%|█████     | 7543/15000 [15:04<11:34, 10.74it/s]


 epoch: 7542 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7543 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7544 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 50%|█████     | 7545/15000 [15:04<11:50, 10.49it/s]


 epoch: 7545 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7546 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.4%


 50%|█████     | 7549/15000 [15:05<17:05,  7.26it/s]


 epoch: 7547 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7548 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 7549 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 50%|█████     | 7550/15000 [15:05<17:05,  7.26it/s]


input:       as the arbitrary commands were embedded in the url itself an attacker could use the exploit via man in

target:      as the arbitrary commands were embedded in the url itself an attacker could use the exploit via man in the

prediction:  as the arbitrary commands were embedded in the url itself an attacker could use the exploit via man in the

 epoch: 7550 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7551 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.5%


 50%|█████     | 7554/15000 [15:05<13:36,  9.12it/s]


 epoch: 7552 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7553 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7554 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%


 50%|█████     | 7556/15000 [15:06<12:47,  9.69it/s]


 epoch: 7555 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7556 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7557 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 50%|█████     | 7558/15000 [15:06<12:27,  9.96it/s]


 epoch: 7558 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 7559 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       anic school is how children become recognized members of the islamic faith children often attend state schools and qur

target:      anic school is how children become recognized members of the islamic faith children often attend state schools and qur anic

prediction:  anic school is how children become recognized members of the islamic faith children often attend state schools and qur the


 50%|█████     | 7560/15000 [15:06<12:31,  9.90it/s]


 epoch: 7560 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 50%|█████     | 7562/15000 [15:07<21:29,  5.77it/s]


 epoch: 7561 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7562 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7563 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 50%|█████     | 7566/15000 [15:07<15:48,  7.83it/s]


 epoch: 7564 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7565 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7566 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 50%|█████     | 7568/15000 [15:07<14:26,  8.58it/s]


 epoch: 7567 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7568 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7569 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 50%|█████     | 7570/15000 [15:07<14:13,  8.71it/s]


input:       the term comedy became synonymous with satire and later with humour in general aristotle poetics was translated into arabic

target:      the term comedy became synonymous with satire and later with humour in general aristotle poetics was translated into arabic in

prediction:  the term comedy became synonymous with satire and later with humour in general aristotle poetics was translated into arabic the

 epoch: 7570 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%

 epoch: 7571 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 50%|█████     | 7574/15000 [15:08<12:20, 10.03it/s]


 epoch: 7572 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7573 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7574 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%


 51%|█████     | 7576/15000 [15:08<13:03,  9.48it/s]


 epoch: 7575 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7576 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7577 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 51%|█████     | 7578/15000 [15:08<12:32,  9.87it/s]


 epoch: 7578 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7579 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

input:       the audience the social transformation model views humour as adaptive because it communicates the present desire to be humorous

target:      the audience the social transformation model views humour as adaptive because it communicates the present desire to be humorous as

prediction:  the audience the social transformation model views humour as adaptive because it communicates the present desire to be humorous the


 51%|█████     | 7582/15000 [15:09<12:01, 10.29it/s]


 epoch: 7580 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7581 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7582 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 51%|█████     | 7584/15000 [15:09<11:41, 10.56it/s]


 epoch: 7583 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 7584 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.7%

 epoch: 7585 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 51%|█████     | 7588/15000 [15:09<11:37, 10.63it/s]


 epoch: 7586 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7587 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7588 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7589 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       it difficult for single autocratic ruler to effectively manage multiple threats at the same time these continuing problems would

target:      it difficult for single autocratic ruler to effectively manage multiple threats at the same time these continuing problems would be

prediction:  it difficult for single autocratic ruler to effectively manage multiple threats at the same time these continuing problems would the


 51%|█████     | 7592/15000 [15:10<18:06,  6.82it/s]


 epoch: 7590 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.8%

 epoch: 7591 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 96.8%

 epoch: 7592 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%


 51%|█████     | 7595/15000 [15:10<15:18,  8.06it/s]


 epoch: 7593 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7594 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7595 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 51%|█████     | 7597/15000 [15:11<14:54,  8.28it/s]


 epoch: 7596 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7597 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 51%|█████     | 7599/15000 [15:11<14:27,  8.53it/s]


 epoch: 7598 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7599 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

input:       communities exhibit the echo chamber effect in which repeated unsourced statements come to be accepted among the community as


 51%|█████     | 7600/15000 [15:11<16:35,  7.43it/s]


target:      communities exhibit the echo chamber effect in which repeated unsourced statements come to be accepted among the community as fact

prediction:  communities exhibit the echo chamber effect in which repeated unsourced statements come to be accepted among the community as the

 epoch: 7600 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 51%|█████     | 7602/15000 [15:11<16:11,  7.61it/s]


 epoch: 7601 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7602 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 51%|█████     | 7603/15000 [15:11<15:51,  7.77it/s]


 epoch: 7603 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 51%|█████     | 7605/15000 [15:12<26:27,  4.66it/s]


 epoch: 7604 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7605 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 51%|█████     | 7607/15000 [15:12<19:59,  6.17it/s]


 epoch: 7606 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7607 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 51%|█████     | 7609/15000 [15:12<17:27,  7.06it/s]


 epoch: 7608 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7609 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%


 51%|█████     | 7610/15000 [15:13<18:54,  6.52it/s]


input:       four types of bases the sequence of bases along particular dna molecule specifies the genetic information in manner similar

target:      four types of bases the sequence of bases along particular dna molecule specifies the genetic information in manner similar to

prediction:  four types of bases the sequence of bases along particular dna molecule specifies the genetic information in manner similar the

 epoch: 7610 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 51%|█████     | 7612/15000 [15:13<17:06,  7.20it/s]


 epoch: 7611 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7612 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 51%|█████     | 7614/15000 [15:13<16:24,  7.50it/s]


 epoch: 7613 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7614 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 51%|█████     | 7616/15000 [15:13<15:34,  7.90it/s]


 epoch: 7615 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 7616 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 51%|█████     | 7618/15000 [15:14<15:29,  7.94it/s]


 epoch: 7617 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7618 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 51%|█████     | 7620/15000 [15:14<19:26,  6.33it/s]


 epoch: 7619 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

input:       equestrian statues of the renaissance donatello gattamelata in padua and verrocchio bartolomeo colleoni in venice and became known as

target:      equestrian statues of the renaissance donatello gattamelata in padua and verrocchio bartolomeo colleoni in venice and became known as the

prediction:  equestrian statues of the renaissance donatello gattamelata in padua and verrocchio bartolomeo colleoni in venice and became known as the

 epoch: 7620 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 51%|█████     | 7622/15000 [15:14<16:42,  7.36it/s]


 epoch: 7621 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7622 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 51%|█████     | 7624/15000 [15:15<15:37,  7.86it/s]


 epoch: 7623 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7624 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 51%|█████     | 7626/15000 [15:15<14:12,  8.65it/s]


 epoch: 7625 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 7626 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 51%|█████     | 7628/15000 [15:15<13:10,  9.33it/s]


 epoch: 7627 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7628 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 51%|█████     | 7630/15000 [15:15<14:50,  8.28it/s]


 epoch: 7629 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

input:       the material on anatomy was published in leonardo treatise on painting during the time that melzi was ordering the

target:      the material on anatomy was published in leonardo treatise on painting during the time that melzi was ordering the material

prediction:  the material on anatomy was published in leonardo treatise on painting during the time that melzi was ordering the the

 epoch: 7630 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.5%


 51%|█████     | 7631/15000 [15:15<14:18,  8.58it/s]


 epoch: 7631 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7632 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 51%|█████     | 7635/15000 [15:16<13:12,  9.29it/s]


 epoch: 7633 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.5%

 epoch: 7634 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7635 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 51%|█████     | 7638/15000 [15:16<12:06, 10.13it/s]


 epoch: 7636 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7637 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7638 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 51%|█████     | 7640/15000 [15:16<12:32,  9.79it/s]


 epoch: 7639 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

input:       of the weak and deformed johannes heinrich schultz german psychologist recognized for developing the technique of autogenic training prominently

target:      of the weak and deformed johannes heinrich schultz german psychologist recognized for developing the technique of autogenic training prominently advocated

prediction:  of the weak and deformed johannes heinrich schultz german psychologist recognized for developing the technique of autogenic training prominently the

 epoch: 7640 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 51%|█████     | 7642/15000 [15:16<11:56, 10.28it/s]


 epoch: 7641 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 7642 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7643 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 51%|█████     | 7646/15000 [15:17<11:28, 10.68it/s]


 epoch: 7644 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7645 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 7646 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 51%|█████     | 7648/15000 [15:17<19:52,  6.17it/s]


 epoch: 7647 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7648 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7649 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 51%|█████     | 7650/15000 [15:18<18:00,  6.80it/s]


input:       stop online piracy and protect ip acts on january reddit participated in hour sitewide blackout to coincide with congressional

target:      stop online piracy and protect ip acts on january reddit participated in hour sitewide blackout to coincide with congressional committee

prediction:  stop online piracy and protect ip acts on january reddit participated in hour sitewide blackout to coincide with congressional the

 epoch: 7650 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7651 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 51%|█████     | 7654/15000 [15:18<14:10,  8.64it/s]


 epoch: 7652 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7653 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7654 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 51%|█████     | 7656/15000 [15:18<13:20,  9.17it/s]


 epoch: 7655 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7656 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7657 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 51%|█████     | 7660/15000 [15:19<12:22,  9.89it/s]


 epoch: 7658 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7659 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       are safe for work the subreddit all originally did not filter topics but as of it does not include

target:      are safe for work the subreddit all originally did not filter topics but as of it does not include not

prediction:  are safe for work the subreddit all originally did not filter topics but as of it does not include the

 epoch: 7660 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7661 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 51%|█████     | 7664/15000 [15:19<14:38,  8.35it/s]


 epoch: 7662 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.8%

 epoch: 7663 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7664 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 51%|█████     | 7666/15000 [15:19<13:25,  9.10it/s]


 epoch: 7665 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7666 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7667 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 51%|█████     | 7670/15000 [15:20<12:11, 10.02it/s]


 epoch: 7668 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7669 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.19, test_acc: 97.3%

input:       three types is normally clear for the typical cases but some forms of education do not easily fall into

target:      three types is normally clear for the typical cases but some forms of education do not easily fall into one

prediction:  three types is normally clear for the typical cases but some forms of education do not easily fall into the

 epoch: 7670 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%


 51%|█████     | 7672/15000 [15:20<12:08, 10.05it/s]


 epoch: 7671 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 7672 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.4%

 epoch: 7673 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 51%|█████     | 7674/15000 [15:20<11:47, 10.36it/s]


 epoch: 7674 | train_loss: 0.20, train_acc: 97.5% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7675 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 51%|█████     | 7678/15000 [15:21<16:56,  7.21it/s]


 epoch: 7676 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7677 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 7678 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 51%|█████     | 7680/15000 [15:21<15:45,  7.74it/s]


 epoch: 7679 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       of the most excellent painters sculptors and architects by th century art historian giorgio vasari tax records indicate that

target:      of the most excellent painters sculptors and architects by th century art historian giorgio vasari tax records indicate that by

prediction:  of the most excellent painters sculptors and architects by th century art historian giorgio vasari tax records indicate that the

 epoch: 7680 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 51%|█████     | 7682/15000 [15:21<14:16,  8.55it/s]


 epoch: 7681 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7682 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 7683 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 51%|█████     | 7686/15000 [15:22<12:24,  9.82it/s]


 epoch: 7684 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7685 | train_loss: 0.19, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7686 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 51%|█████▏    | 7688/15000 [15:22<11:55, 10.22it/s]


 epoch: 7687 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 7688 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7689 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 51%|█████▏    | 7690/15000 [15:22<13:02,  9.34it/s]


input:       forever transmuted it into life communicating values the interest in leonardo genius has continued unabated experts study and translate

target:      forever transmuted it into life communicating values the interest in leonardo genius has continued unabated experts study and translate his

prediction:  forever transmuted it into life communicating values the interest in leonardo genius has continued unabated experts study and translate the

 epoch: 7690 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7691 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 51%|█████▏    | 7694/15000 [15:22<11:45, 10.35it/s]


 epoch: 7692 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7693 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7694 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 51%|█████▏    | 7696/15000 [15:23<11:39, 10.44it/s]


 epoch: 7695 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.2%

 epoch: 7696 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7697 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 51%|█████▏    | 7700/15000 [15:23<11:30, 10.57it/s]


 epoch: 7698 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7699 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

input:       de medici to ludovico il moro who ruled milan between and madonna of the carnation alte pinakothek munich landscape

target:      de medici to ludovico il moro who ruled milan between and madonna of the carnation alte pinakothek munich landscape of

prediction:  de medici to ludovico il moro who ruled milan between and madonna of the carnation alte pinakothek munich landscape the

 epoch: 7700 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 51%|█████▏    | 7702/15000 [15:23<11:12, 10.86it/s]


 epoch: 7701 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7702 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 7703 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%


 51%|█████▏    | 7704/15000 [15:23<11:11, 10.86it/s]


 epoch: 7704 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 96.8%


 51%|█████▏    | 7706/15000 [15:24<16:56,  7.18it/s]


 epoch: 7705 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7706 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7707 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 51%|█████▏    | 7710/15000 [15:24<14:06,  8.61it/s]


 epoch: 7708 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7709 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

input:       of the second world war they had recovered and were having to compete with the growing economic strength of

target:      of the second world war they had recovered and were having to compete with the growing economic strength of the

prediction:  of the second world war they had recovered and were having to compete with the growing economic strength of the

 epoch: 7710 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 51%|█████▏    | 7713/15000 [15:24<12:49,  9.47it/s]


 epoch: 7711 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7712 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7713 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 51%|█████▏    | 7715/15000 [15:25<12:28,  9.73it/s]


 epoch: 7714 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7715 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%


 51%|█████▏    | 7717/15000 [15:25<12:55,  9.39it/s]


 epoch: 7716 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7717 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 51%|█████▏    | 7719/15000 [15:25<15:08,  8.01it/s]


 epoch: 7718 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7719 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 51%|█████▏    | 7721/15000 [15:25<15:21,  7.90it/s]


input:       spot forming the hawaiian islands it is almost entirely oceanic crust the oldest member disappearing by way of the

target:      spot forming the hawaiian islands it is almost entirely oceanic crust the oldest member disappearing by way of the plate

prediction:  spot forming the hawaiian islands it is almost entirely oceanic crust the oldest member disappearing by way of the the

 epoch: 7720 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 7721 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 51%|█████▏    | 7723/15000 [15:26<14:43,  8.24it/s]


 epoch: 7722 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7723 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 52%|█████▏    | 7725/15000 [15:26<13:56,  8.70it/s]


 epoch: 7724 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7725 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 52%|█████▏    | 7727/15000 [15:26<13:27,  9.01it/s]


 epoch: 7726 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7727 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.4%


 52%|█████▏    | 7729/15000 [15:26<13:24,  9.04it/s]


 epoch: 7728 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 7729 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.7%

input:       matter is currently the subject of most active laboratory investigations remaining issues include the cuspy halo problem and the


 52%|█████▏    | 7731/15000 [15:27<15:42,  7.71it/s]


target:      matter is currently the subject of most active laboratory investigations remaining issues include the cuspy halo problem and the dwarf

prediction:  matter is currently the subject of most active laboratory investigations remaining issues include the cuspy halo problem and the the

 epoch: 7730 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7731 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 52%|█████▏    | 7733/15000 [15:27<15:03,  8.04it/s]


 epoch: 7732 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7733 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 52%|█████▏    | 7735/15000 [15:27<25:37,  4.73it/s]


 epoch: 7734 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7735 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 52%|█████▏    | 7737/15000 [15:28<19:30,  6.20it/s]


 epoch: 7736 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.0%

 epoch: 7737 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 52%|█████▏    | 7739/15000 [15:28<16:40,  7.26it/s]


 epoch: 7738 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7739 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 52%|█████▏    | 7740/15000 [15:28<19:10,  6.31it/s]


input:       of northwestern africa and the iberian peninsula during the eleventh century the banu hilal and banu ma qil were

target:      of northwestern africa and the iberian peninsula during the eleventh century the banu hilal and banu ma qil were collection

prediction:  of northwestern africa and the iberian peninsula during the eleventh century the banu hilal and banu ma qil were the

 epoch: 7740 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 52%|█████▏    | 7741/15000 [15:28<17:46,  6.81it/s]


 epoch: 7741 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7742 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 52%|█████▏    | 7744/15000 [15:29<14:53,  8.12it/s]


 epoch: 7743 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7744 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7745 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 52%|█████▏    | 7746/15000 [15:29<13:26,  9.00it/s]


 epoch: 7746 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7747 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 52%|█████▏    | 7750/15000 [15:30<18:56,  6.38it/s]


 epoch: 7748 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7749 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

input:       challenge to their hegemony but an ethnic group rather than an organised state israel had emerged by the middle

target:      challenge to their hegemony but an ethnic group rather than an organised state israel had emerged by the middle of

prediction:  challenge to their hegemony but an ethnic group rather than an organised state israel had emerged by the middle the

 epoch: 7750 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%


 52%|█████▏    | 7752/15000 [15:30<16:14,  7.44it/s]


 epoch: 7751 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7752 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7753 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 52%|█████▏    | 7756/15000 [15:30<13:13,  9.13it/s]


 epoch: 7754 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7755 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7756 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 52%|█████▏    | 7758/15000 [15:30<12:21,  9.77it/s]


 epoch: 7757 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7758 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7759 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 52%|█████▏    | 7760/15000 [15:31<12:35,  9.58it/s]


input:       guinea in melanesia papua new guinea is geographically the closest country to australia and is often geologically associated with

target:      guinea in melanesia papua new guinea is geographically the closest country to australia and is often geologically associated with australia

prediction:  guinea in melanesia papua new guinea is geographically the closest country to australia and is often geologically associated with the

 epoch: 7760 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.7%

 epoch: 7761 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 52%|█████▏    | 7764/15000 [15:31<16:07,  7.48it/s]


 epoch: 7762 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 7763 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7764 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 52%|█████▏    | 7767/15000 [15:32<13:44,  8.78it/s]


 epoch: 7765 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7766 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7767 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 52%|█████▏    | 7769/15000 [15:32<12:48,  9.41it/s]


 epoch: 7768 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7769 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       an april fools internet standard called the hyper text coffee pot control protocol original research citation needed provocative humor

target:      an april fools internet standard called the hyper text coffee pot control protocol original research citation needed provocative humor that

prediction:  an april fools internet standard called the hyper text coffee pot control protocol original research citation needed provocative humor the

 epoch: 7770 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 52%|█████▏    | 7773/15000 [15:32<12:19,  9.77it/s]


 epoch: 7771 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7772 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7773 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.8%


 52%|█████▏    | 7775/15000 [15:32<12:20,  9.76it/s]


 epoch: 7774 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7775 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7776 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 52%|█████▏    | 7779/15000 [15:33<15:50,  7.59it/s]


 epoch: 7777 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7778 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7779 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 52%|█████▏    | 7780/15000 [15:33<16:16,  7.40it/s]


input:       and judicial the governor is elected statewide the lieutenant governor acts as the secretary of state the governor and

target:      and judicial the governor is elected statewide the lieutenant governor acts as the secretary of state the governor and lieutenant

prediction:  and judicial the governor is elected statewide the lieutenant governor acts as the secretary of state the governor and the

 epoch: 7780 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7781 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 52%|█████▏    | 7784/15000 [15:33<13:06,  9.17it/s]


 epoch: 7782 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 7783 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7784 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 52%|█████▏    | 7786/15000 [15:34<12:22,  9.71it/s]


 epoch: 7785 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7786 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7787 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 52%|█████▏    | 7788/15000 [15:34<12:15,  9.80it/s]


 epoch: 7788 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7789 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

input:       in an object in addition powershell allows formatting definitions to be specified so the text representation of objects can

target:      in an object in addition powershell allows formatting definitions to be specified so the text representation of objects can be

prediction:  in an object in addition powershell allows formatting definitions to be specified so the text representation of objects can the


 52%|█████▏    | 7790/15000 [15:34<12:49,  9.37it/s]


 epoch: 7790 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 52%|█████▏    | 7793/15000 [15:35<18:30,  6.49it/s]


 epoch: 7791 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7792 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.3%

 epoch: 7793 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 52%|█████▏    | 7796/15000 [15:35<14:55,  8.04it/s]


 epoch: 7794 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 7795 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7796 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 52%|█████▏    | 7799/15000 [15:35<12:57,  9.26it/s]


 epoch: 7797 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.4%

 epoch: 7798 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.8%

 epoch: 7799 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 52%|█████▏    | 7801/15000 [15:36<12:59,  9.23it/s]


input:       on mars it is most likely to be located underground where liquid water can still exist conditions on the

target:      on mars it is most likely to be located underground where liquid water can still exist conditions on the other

prediction:  on mars it is most likely to be located underground where liquid water can still exist conditions on the the

 epoch: 7800 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7801 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 52%|█████▏    | 7804/15000 [15:36<11:58, 10.02it/s]


 epoch: 7802 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7803 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 7804 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 52%|█████▏    | 7806/15000 [15:36<17:41,  6.78it/s]


 epoch: 7805 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7806 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7807 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 52%|█████▏    | 7808/15000 [15:37<15:18,  7.83it/s]


 epoch: 7808 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7809 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       and drying climate meant that by bc the sahara region was becoming increasingly dry and hostile around bc due

target:      and drying climate meant that by bc the sahara region was becoming increasingly dry and hostile around bc due to

prediction:  and drying climate meant that by bc the sahara region was becoming increasingly dry and hostile around bc due the


 52%|█████▏    | 7812/15000 [15:37<13:14,  9.05it/s]


 epoch: 7810 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7811 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7812 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 52%|█████▏    | 7815/15000 [15:37<12:25,  9.63it/s]


 epoch: 7813 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7814 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7815 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 52%|█████▏    | 7817/15000 [15:37<12:17,  9.74it/s]


 epoch: 7816 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7817 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.1%

 epoch: 7818 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 52%|█████▏    | 7820/15000 [15:38<15:31,  7.71it/s]


 epoch: 7819 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       during the renaissance some of his smaller inventions however entered the world of manufacturing unheralded such as an automated

target:      during the renaissance some of his smaller inventions however entered the world of manufacturing unheralded such as an automated bobbin

prediction:  during the renaissance some of his smaller inventions however entered the world of manufacturing unheralded such as an automated the

 epoch: 7820 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 52%|█████▏    | 7822/15000 [15:38<14:07,  8.47it/s]


 epoch: 7821 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7822 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 7823 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 52%|█████▏    | 7826/15000 [15:38<11:59,  9.98it/s]


 epoch: 7824 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7825 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7826 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 52%|█████▏    | 7828/15000 [15:39<12:03,  9.92it/s]


 epoch: 7827 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7828 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%


 52%|█████▏    | 7830/15000 [15:39<13:06,  9.12it/s]


 epoch: 7829 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

input:       the expression descent with modification rather than evolution partly influenced by an essay on the principle of population by

target:      the expression descent with modification rather than evolution partly influenced by an essay on the principle of population by thomas

prediction:  the expression descent with modification rather than evolution partly influenced by an essay on the principle of population by the

 epoch: 7830 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.9%


 52%|█████▏    | 7832/15000 [15:39<13:48,  8.65it/s]


 epoch: 7831 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7832 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 52%|█████▏    | 7833/15000 [15:39<14:10,  8.43it/s]


 epoch: 7833 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 52%|█████▏    | 7835/15000 [15:40<24:52,  4.80it/s]


 epoch: 7834 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7835 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 52%|█████▏    | 7837/15000 [15:40<19:32,  6.11it/s]


 epoch: 7836 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7837 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 52%|█████▏    | 7839/15000 [15:41<17:17,  6.90it/s]


 epoch: 7838 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.4%

 epoch: 7839 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 52%|█████▏    | 7841/15000 [15:41<16:41,  7.15it/s]


input:       and depicting it in utmost detail and did not emphasise experiments or theoretical explanation since he lacked formal education

target:      and depicting it in utmost detail and did not emphasise experiments or theoretical explanation since he lacked formal education in

prediction:  and depicting it in utmost detail and did not emphasise experiments or theoretical explanation since he lacked formal education the

 epoch: 7840 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7841 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 52%|█████▏    | 7843/15000 [15:41<15:36,  7.64it/s]


 epoch: 7842 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 7843 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 52%|█████▏    | 7845/15000 [15:41<15:37,  7.63it/s]


 epoch: 7844 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7845 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 52%|█████▏    | 7847/15000 [15:42<15:23,  7.75it/s]


 epoch: 7846 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7847 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 52%|█████▏    | 7848/15000 [15:42<31:13,  3.82it/s]


 epoch: 7848 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7849 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.8%

input:       the founding of the holy roman empire which eventually became centred in the german principalities of central europe east

target:      the founding of the holy roman empire which eventually became centred in the german principalities of central europe east central

prediction:  the founding of the holy roman empire which eventually became centred in the german principalities of central europe east the


 52%|█████▏    | 7851/15000 [15:42<20:41,  5.76it/s]


 epoch: 7850 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 7851 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 52%|█████▏    | 7853/15000 [15:43<16:25,  7.25it/s]


 epoch: 7852 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7853 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7854 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 52%|█████▏    | 7857/15000 [15:43<12:52,  9.25it/s]


 epoch: 7855 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7856 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.2%

 epoch: 7857 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 52%|█████▏    | 7859/15000 [15:43<12:11,  9.76it/s]


 epoch: 7858 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7859 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       conflict grew between athens and sparta suspicious of the increasing athenian power funded by the delian league sparta offered

target:      conflict grew between athens and sparta suspicious of the increasing athenian power funded by the delian league sparta offered aid

prediction:  conflict grew between athens and sparta suspicious of the increasing athenian power funded by the delian league sparta offered the


 52%|█████▏    | 7862/15000 [15:44<12:28,  9.54it/s]


 epoch: 7860 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 7861 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7862 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 52%|█████▏    | 7865/15000 [15:44<16:32,  7.19it/s]


 epoch: 7863 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 7864 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7865 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 52%|█████▏    | 7868/15000 [15:44<13:41,  8.68it/s]


 epoch: 7866 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7867 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7868 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 52%|█████▏    | 7870/15000 [15:45<13:26,  8.84it/s]


 epoch: 7869 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

input:       out and others changed quite quickly on the north america southeastern coast spanish explorer juan ponce de le who

target:      out and others changed quite quickly on the north america southeastern coast spanish explorer juan ponce de le who had

prediction:  out and others changed quite quickly on the north america southeastern coast spanish explorer juan ponce de le who the

 epoch: 7870 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 52%|█████▏    | 7873/15000 [15:45<12:23,  9.58it/s]


 epoch: 7871 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 7872 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7873 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%


 53%|█████▎    | 7876/15000 [15:45<11:41, 10.15it/s]


 epoch: 7874 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.25, test_acc: 96.8%

 epoch: 7875 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%

 epoch: 7876 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 53%|█████▎    | 7878/15000 [15:46<16:21,  7.26it/s]


 epoch: 7877 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7878 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.0%

 epoch: 7879 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 53%|█████▎    | 7880/15000 [15:46<15:09,  7.83it/s]


input:       sold over million physical copies in japan making it the best selling vita game in the country minecraft helped

target:      sold over million physical copies in japan making it the best selling vita game in the country minecraft helped improve

prediction:  sold over million physical copies in japan making it the best selling vita game in the country minecraft helped the

 epoch: 7880 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 7881 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 53%|█████▎    | 7884/15000 [15:46<12:33,  9.45it/s]


 epoch: 7882 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.1%

 epoch: 7883 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7884 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 53%|█████▎    | 7886/15000 [15:46<12:11,  9.73it/s]


 epoch: 7885 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7886 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7887 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 53%|█████▎    | 7888/15000 [15:47<12:00,  9.87it/s]


 epoch: 7888 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 7889 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

input:       better at locating likely errors in code that is syntactically correct but these tools have reputation of false positives

target:      better at locating likely errors in code that is syntactically correct but these tools have reputation of false positives where

prediction:  better at locating likely errors in code that is syntactically correct but these tools have reputation of false positives the


 53%|█████▎    | 7890/15000 [15:47<12:33,  9.44it/s]


 epoch: 7890 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 53%|█████▎    | 7892/15000 [15:47<20:29,  5.78it/s]


 epoch: 7891 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 7892 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7893 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 53%|█████▎    | 7896/15000 [15:48<14:26,  8.20it/s]


 epoch: 7894 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7895 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%

 epoch: 7896 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 53%|█████▎    | 7898/15000 [15:48<13:01,  9.09it/s]


 epoch: 7897 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7898 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7899 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 53%|█████▎    | 7900/15000 [15:48<12:59,  9.11it/s]


input:       posts by users from the donald and declared intentions to take actions against hundreds of the most toxic users

target:      posts by users from the donald and declared intentions to take actions against hundreds of the most toxic users of

prediction:  posts by users from the donald and declared intentions to take actions against hundreds of the most toxic users the

 epoch: 7900 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7901 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 53%|█████▎    | 7904/15000 [15:49<11:49, 10.00it/s]


 epoch: 7902 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7903 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7904 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7905 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 53%|█████▎    | 7908/15000 [15:49<16:48,  7.03it/s]


 epoch: 7906 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7907 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7908 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 53%|█████▎    | 7910/15000 [15:50<15:52,  7.44it/s]


 epoch: 7909 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       th century bc with influences from greece and other italian civilisations such as the etruscans traditionally rome was founded

target:      th century bc with influences from greece and other italian civilisations such as the etruscans traditionally rome was founded as

prediction:  th century bc with influences from greece and other italian civilisations such as the etruscans traditionally rome was founded the

 epoch: 7910 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 53%|█████▎    | 7913/15000 [15:50<13:18,  8.87it/s]


 epoch: 7911 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7912 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7913 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 53%|█████▎    | 7915/15000 [15:50<12:18,  9.59it/s]


 epoch: 7914 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7915 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7916 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 53%|█████▎    | 7919/15000 [15:50<11:42, 10.08it/s]


 epoch: 7917 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 7918 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7919 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

input:       europe were redrawn once more this made old previously interrupted cultural and economic relationships possible and previously isolated cities

target:      europe were redrawn once more this made old previously interrupted cultural and economic relationships possible and previously isolated cities such

prediction:  europe were redrawn once more this made old previously interrupted cultural and economic relationships possible and previously isolated cities the


 53%|█████▎    | 7921/15000 [15:51<20:42,  5.70it/s]


 epoch: 7920 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 7921 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7922 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 53%|█████▎    | 7925/15000 [15:51<15:10,  7.77it/s]


 epoch: 7923 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7924 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7925 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 53%|█████▎    | 7927/15000 [15:52<13:44,  8.58it/s]


 epoch: 7926 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 7927 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7928 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 53%|█████▎    | 7929/15000 [15:52<12:58,  9.08it/s]


 epoch: 7929 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

input:       addition there is the pan american highway which crosses argentina and the andean countries from north to south although

target:      addition there is the pan american highway which crosses argentina and the andean countries from north to south although some

prediction:  addition there is the pan american highway which crosses argentina and the andean countries from north to south although the

 epoch: 7930 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%


 53%|█████▎    | 7932/15000 [15:52<13:43,  8.59it/s]


 epoch: 7931 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

 epoch: 7932 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 53%|█████▎    | 7933/15000 [15:52<14:07,  8.34it/s]


 epoch: 7933 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 53%|█████▎    | 7935/15000 [15:53<21:21,  5.51it/s]


 epoch: 7934 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7935 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 53%|█████▎    | 7937/15000 [15:53<18:04,  6.51it/s]


 epoch: 7936 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7937 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 53%|█████▎    | 7939/15000 [15:53<16:12,  7.26it/s]


 epoch: 7938 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7939 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.4%

input:       went to rome that september where he was received by the pope brother giuliano from september to leonardo spent


 53%|█████▎    | 7941/15000 [15:54<16:34,  7.10it/s]


target:      went to rome that september where he was received by the pope brother giuliano from september to leonardo spent much

prediction:  went to rome that september where he was received by the pope brother giuliano from september to leonardo spent the

 epoch: 7940 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 7941 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 53%|█████▎    | 7943/15000 [15:54<16:04,  7.32it/s]


 epoch: 7942 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7943 | train_loss: 0.28, train_acc: 96.6% | test_loss: 0.22, test_acc: 97.2%


 53%|█████▎    | 7945/15000 [15:54<16:17,  7.22it/s]


 epoch: 7944 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7945 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 53%|█████▎    | 7947/15000 [15:55<16:22,  7.18it/s]


 epoch: 7946 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 96.9%

 epoch: 7947 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 53%|█████▎    | 7948/15000 [15:55<16:29,  7.13it/s]


 epoch: 7948 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 53%|█████▎    | 7950/15000 [15:55<28:00,  4.20it/s]


 epoch: 7949 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       to create detailed images of wave speeds inside the earth in the same way doctor images body in ct

target:      to create detailed images of wave speeds inside the earth in the same way doctor images body in ct scan

prediction:  to create detailed images of wave speeds inside the earth in the same way doctor images body in ct the

 epoch: 7950 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 53%|█████▎    | 7953/15000 [15:56<17:20,  6.77it/s]


 epoch: 7951 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7952 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7953 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 53%|█████▎    | 7956/15000 [15:56<13:48,  8.51it/s]


 epoch: 7954 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7955 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7956 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%


 53%|█████▎    | 7958/15000 [15:56<12:30,  9.38it/s]


 epoch: 7957 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7958 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

 epoch: 7959 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 53%|█████▎    | 7960/15000 [15:56<12:36,  9.30it/s]


input:       and fulfill their needs and desires in modern society this involves wide range of skills like being able to

target:      and fulfill their needs and desires in modern society this involves wide range of skills like being able to speak

prediction:  and fulfill their needs and desires in modern society this involves wide range of skills like being able to the

 epoch: 7960 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7961 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.4%


 53%|█████▎    | 7962/15000 [15:57<12:18,  9.53it/s]


 epoch: 7962 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 53%|█████▎    | 7964/15000 [15:57<18:16,  6.42it/s]


 epoch: 7963 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7964 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7965 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 53%|█████▎    | 7968/15000 [15:58<13:40,  8.57it/s]


 epoch: 7966 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7967 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7968 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 53%|█████▎    | 7970/15000 [15:58<13:49,  8.48it/s]


 epoch: 7969 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       of formation geology determines the relative ages of rocks found at given location geochemistry branch of geology determines their

target:      of formation geology determines the relative ages of rocks found at given location geochemistry branch of geology determines their absolute

prediction:  of formation geology determines the relative ages of rocks found at given location geochemistry branch of geology determines their the

 epoch: 7970 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 53%|█████▎    | 7972/15000 [15:58<13:23,  8.75it/s]


 epoch: 7971 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7972 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 53%|█████▎    | 7975/15000 [15:58<11:44,  9.97it/s]


 epoch: 7973 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 7974 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7975 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 53%|█████▎    | 7976/15000 [15:58<11:46,  9.94it/s]


 epoch: 7976 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%


 53%|█████▎    | 7978/15000 [15:59<16:43,  7.00it/s]


 epoch: 7977 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7978 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7979 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 53%|█████▎    | 7980/15000 [15:59<14:38,  8.00it/s]


input:       and is currently known as the pacific community its members include australia and other pacific islands forum members in

target:      and is currently known as the pacific community its members include australia and other pacific islands forum members in article

prediction:  and is currently known as the pacific community its members include australia and other pacific islands forum members in the

 epoch: 7980 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 7981 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 53%|█████▎    | 7984/15000 [15:59<12:11,  9.59it/s]


 epoch: 7982 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7983 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7984 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 53%|█████▎    | 7986/15000 [16:00<11:37, 10.05it/s]


 epoch: 7985 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 7986 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 7987 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 53%|█████▎    | 7988/15000 [16:00<11:22, 10.28it/s]


 epoch: 7988 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7989 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       compulsory education upper secondary education aims to provide students with the skills and knowledge needed for employment or tertiary

target:      compulsory education upper secondary education aims to provide students with the skills and knowledge needed for employment or tertiary education

prediction:  compulsory education upper secondary education aims to provide students with the skills and knowledge needed for employment or tertiary the


 53%|█████▎    | 7990/15000 [16:00<11:41,  9.99it/s]


 epoch: 7990 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 7991 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 53%|█████▎    | 7994/15000 [16:01<17:27,  6.69it/s]


 epoch: 7992 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 7993 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 7994 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 53%|█████▎    | 7997/15000 [16:01<14:31,  8.04it/s]


 epoch: 7995 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 7996 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 7997 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 53%|█████▎    | 7998/15000 [16:01<14:01,  8.32it/s]


 epoch: 7998 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 7999 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%

input:       packaging vendor for an undisclosed sum of money the deal was closed on april in early july the github

target:      packaging vendor for an undisclosed sum of money the deal was closed on april in early july the github archive

prediction:  packaging vendor for an undisclosed sum of money the deal was closed on april in early july the github the

 epoch: 8000 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%

 53%|█████▎    | 8001/15000 [16:02<13:09,  8.86it/s]



 epoch: 8001 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8002 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 53%|█████▎    | 8005/15000 [16:02<11:20, 10.28it/s]


 epoch: 8003 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8004 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8005 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 53%|█████▎    | 8007/15000 [16:02<15:25,  7.55it/s]


 epoch: 8006 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8007 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8008 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 53%|█████▎    | 8010/15000 [16:03<14:17,  8.15it/s]


 epoch: 8009 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       plant materials for food and employed the medicinal properties of vegetation for healing most modern human use of plants

target:      plant materials for food and employed the medicinal properties of vegetation for healing most modern human use of plants is

prediction:  plant materials for food and employed the medicinal properties of vegetation for healing most modern human use of plants the

 epoch: 8010 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 53%|█████▎    | 8012/15000 [16:03<12:57,  8.99it/s]


 epoch: 8011 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%

 epoch: 8012 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8013 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 53%|█████▎    | 8016/15000 [16:03<11:29, 10.14it/s]


 epoch: 8014 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 8015 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8016 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 53%|█████▎    | 8018/15000 [16:03<11:13, 10.36it/s]


 epoch: 8017 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8018 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8019 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

input:       division use fractions calculate the areas of rectangles triangles and circles and compute the volumes of boxes columns and

target:      division use fractions calculate the areas of rectangles triangles and circles and compute the volumes of boxes columns and pyramids

prediction:  division use fractions calculate the areas of rectangles triangles and circles and compute the volumes of boxes columns and the


 53%|█████▎    | 8022/15000 [16:04<15:20,  7.58it/s]


 epoch: 8020 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8021 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8022 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.4%


 53%|█████▎    | 8024/15000 [16:04<14:39,  7.93it/s]


 epoch: 8023 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8024 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 54%|█████▎    | 8026/15000 [16:04<12:55,  8.99it/s]


 epoch: 8025 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%

 epoch: 8026 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8027 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 54%|█████▎    | 8028/15000 [16:05<12:00,  9.67it/s]


 epoch: 8028 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.6%

 epoch: 8029 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       provide and use apis are considered as being members of business ecosystem the main policies for releasing an api

target:      provide and use apis are considered as being members of business ecosystem the main policies for releasing an api are

prediction:  provide and use apis are considered as being members of business ecosystem the main policies for releasing an api the

 epoch: 8030 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 54%|█████▎    | 8032/15000 [16:05<11:40,  9.95it/s]


 epoch: 8031 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8032 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8033 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 54%|█████▎    | 8034/15000 [16:05<11:49,  9.81it/s]


 epoch: 8034 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 54%|█████▎    | 8036/15000 [16:06<19:33,  5.93it/s]


 epoch: 8035 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 8036 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8037 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 54%|█████▎    | 8039/15000 [16:06<16:11,  7.16it/s]


 epoch: 8038 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8039 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

input:       cities has also been developed mi telef rico also known as telef rico la paz el alto la paz


 54%|█████▎    | 8040/15000 [16:06<16:36,  6.98it/s]


target:      cities has also been developed mi telef rico also known as telef rico la paz el alto la paz el

prediction:  cities has also been developed mi telef rico also known as telef rico la paz el alto la paz the

 epoch: 8040 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8041 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 54%|█████▎    | 8043/15000 [16:07<14:38,  7.92it/s]


 epoch: 8042 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8043 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 54%|█████▎    | 8045/15000 [16:07<13:48,  8.39it/s]


 epoch: 8044 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8045 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 54%|█████▎    | 8047/15000 [16:07<13:44,  8.43it/s]


 epoch: 8046 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8047 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 54%|█████▎    | 8048/15000 [16:07<14:01,  8.26it/s]


 epoch: 8048 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 54%|█████▎    | 8050/15000 [16:08<23:46,  4.87it/s]


 epoch: 8049 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

input:       feelings for his pupils as both loving and passionate it has been claimed since the th century that these

target:      feelings for his pupils as both loving and passionate it has been claimed since the th century that these relationships

prediction:  feelings for his pupils as both loving and passionate it has been claimed since the th century that these the

 epoch: 8050 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 54%|█████▎    | 8052/15000 [16:08<17:56,  6.45it/s]


 epoch: 8051 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8052 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%


 54%|█████▎    | 8054/15000 [16:08<16:49,  6.88it/s]


 epoch: 8053 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8054 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 54%|█████▎    | 8056/15000 [16:09<16:02,  7.21it/s]


 epoch: 8055 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8056 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 54%|█████▎    | 8058/15000 [16:09<15:35,  7.42it/s]


 epoch: 8057 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8058 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 54%|█████▎    | 8060/15000 [16:09<15:14,  7.59it/s]


 epoch: 8059 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

input:       empire following the death of the last republican dictator the first emperor adoptive father julius caesar rome had begun

target:      empire following the death of the last republican dictator the first emperor adoptive father julius caesar rome had begun expanding

prediction:  empire following the death of the last republican dictator the first emperor adoptive father julius caesar rome had begun the

 epoch: 8060 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 54%|█████▎    | 8062/15000 [16:09<13:56,  8.29it/s]


 epoch: 8061 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8062 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 54%|█████▍    | 8065/15000 [16:10<20:39,  5.60it/s]


 epoch: 8063 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8064 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8065 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 54%|█████▍    | 8067/15000 [16:10<16:40,  6.93it/s]


 epoch: 8066 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8067 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8068 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 54%|█████▍    | 8070/15000 [16:11<14:27,  7.98it/s]


 epoch: 8069 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       that part of the api should be considered candidate for being removed or modified in backward incompatible way therefore

target:      that part of the api should be considered candidate for being removed or modified in backward incompatible way therefore these

prediction:  that part of the api should be considered candidate for being removed or modified in backward incompatible way therefore the

 epoch: 8070 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 54%|█████▍    | 8073/15000 [16:11<12:38,  9.13it/s]


 epoch: 8071 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8072 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8073 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.7%


 54%|█████▍    | 8076/15000 [16:11<11:43,  9.84it/s]


 epoch: 8074 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8075 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8076 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8077 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 54%|█████▍    | 8078/15000 [16:12<20:50,  5.54it/s]


 epoch: 8078 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.5%

 epoch: 8079 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       did the same to judah in after both conquests the conquering forces deported many of the inhabitants to other

target:      did the same to judah in after both conquests the conquering forces deported many of the inhabitants to other regions

prediction:  did the same to judah in after both conquests the conquering forces deported many of the inhabitants to other the


 54%|█████▍    | 8081/15000 [16:12<17:02,  6.76it/s]


 epoch: 8080 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8081 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 54%|█████▍    | 8085/15000 [16:12<13:04,  8.81it/s]


 epoch: 8082 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8083 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 8084 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 8085 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 54%|█████▍    | 8088/15000 [16:13<11:46,  9.78it/s]


 epoch: 8086 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8087 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8088 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 54%|█████▍    | 8090/15000 [16:13<12:11,  9.45it/s]


 epoch: 8089 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

input:       governments legally regulate who can provide psychological services or represent themselves as psychologist the apa defines psychologist as someone

target:      governments legally regulate who can provide psychological services or represent themselves as psychologist the apa defines psychologist as someone with

prediction:  governments legally regulate who can provide psychological services or represent themselves as psychologist the apa defines psychologist as someone the

 epoch: 8090 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8091 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 54%|█████▍    | 8094/15000 [16:13<11:33,  9.96it/s]


 epoch: 8092 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8093 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8094 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 54%|█████▍    | 8096/15000 [16:14<11:21, 10.13it/s]


 epoch: 8095 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8096 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8097 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 54%|█████▍    | 8100/15000 [16:14<11:04, 10.39it/s]


 epoch: 8098 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8099 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

input:       as such areas had long been part of different traditions for more than millennium the art of such areas

target:      as such areas had long been part of different traditions for more than millennium the art of such areas had

prediction:  as such areas had long been part of different traditions for more than millennium the art of such areas the

 epoch: 8100 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 54%|█████▍    | 8102/15000 [16:14<11:00, 10.44it/s]


 epoch: 8101 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8102 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8103 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 54%|█████▍    | 8106/15000 [16:14<10:45, 10.67it/s]


 epoch: 8104 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.19, test_acc: 97.4%

 epoch: 8105 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.0%

 epoch: 8106 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 54%|█████▍    | 8108/15000 [16:15<17:53,  6.42it/s]


 epoch: 8107 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8108 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8109 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 54%|█████▍    | 8110/15000 [16:15<16:26,  6.98it/s]


input:       rules and accept the discussion of unexpected controversial or scandalous subjects the ability to understand the premise and appreciate

target:      rules and accept the discussion of unexpected controversial or scandalous subjects the ability to understand the premise and appreciate the

prediction:  rules and accept the discussion of unexpected controversial or scandalous subjects the ability to understand the premise and appreciate the

 epoch: 8110 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8111 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 54%|█████▍    | 8114/15000 [16:16<13:16,  8.64it/s]


 epoch: 8112 | train_loss: 0.20, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8113 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 8114 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%


 54%|█████▍    | 8117/15000 [16:16<11:58,  9.58it/s]


 epoch: 8115 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8116 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8117 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 54%|█████▍    | 8119/15000 [16:16<11:26, 10.02it/s]


 epoch: 8118 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8119 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

input:       the production of coal the continent had of the largest world producers in colombia th and brazil th grape

target:      the production of coal the continent had of the largest world producers in colombia th and brazil th grape plantation

prediction:  the production of coal the continent had of the largest world producers in colombia th and brazil th grape the

 epoch: 8120 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 54%|█████▍    | 8123/15000 [16:17<14:06,  8.13it/s]


 epoch: 8121 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%

 epoch: 8122 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8123 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 54%|█████▍    | 8125/15000 [16:17<12:57,  8.84it/s]


 epoch: 8124 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8125 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8126 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 54%|█████▍    | 8129/15000 [16:17<11:17, 10.14it/s]


 epoch: 8127 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8128 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 8129 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 54%|█████▍    | 8131/15000 [16:18<11:49,  9.68it/s]


input:       certain globular clusters appeared to indicate that they were about billion years old which conflicted with most then current

target:      certain globular clusters appeared to indicate that they were about billion years old which conflicted with most then current estimates

prediction:  certain globular clusters appeared to indicate that they were about billion years old which conflicted with most then current the

 epoch: 8130 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8131 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 54%|█████▍    | 8133/15000 [16:18<11:43,  9.76it/s]


 epoch: 8132 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8133 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 8134 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 54%|█████▍    | 8137/15000 [16:18<11:19, 10.10it/s]


 epoch: 8135 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8136 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 8137 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 54%|█████▍    | 8139/15000 [16:18<10:57, 10.44it/s]


 epoch: 8138 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8139 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       north america southeastern coast spanish explorer juan ponce de le who had accompanied columbus second voyage visited and named

target:      north america southeastern coast spanish explorer juan ponce de le who had accompanied columbus second voyage visited and named in

prediction:  north america southeastern coast spanish explorer juan ponce de le who had accompanied columbus second voyage visited and named the

 epoch: 8140 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 54%|█████▍    | 8143/15000 [16:19<10:42, 10.67it/s]


 epoch: 8141 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8142 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8143 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 54%|█████▍    | 8145/15000 [16:19<10:48, 10.57it/s]


 epoch: 8144 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8145 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8146 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 54%|█████▍    | 8147/15000 [16:19<11:27,  9.96it/s]


 epoch: 8147 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8148 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 54%|█████▍    | 8150/15000 [16:20<20:14,  5.64it/s]


 epoch: 8149 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       could be subjected to forms of corporal punishment not normally exercised on citizens sexual exploitation torture and summary execution

target:      could be subjected to forms of corporal punishment not normally exercised on citizens sexual exploitation torture and summary execution slave

prediction:  could be subjected to forms of corporal punishment not normally exercised on citizens sexual exploitation torture and summary execution the

 epoch: 8150 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.2%


 54%|█████▍    | 8152/15000 [16:20<18:00,  6.34it/s]


 epoch: 8151 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8152 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 54%|█████▍    | 8154/15000 [16:20<16:26,  6.94it/s]


 epoch: 8153 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8154 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%


 54%|█████▍    | 8156/15000 [16:21<15:10,  7.51it/s]


 epoch: 8155 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%

 epoch: 8156 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 54%|█████▍    | 8158/15000 [16:21<13:48,  8.26it/s]


 epoch: 8157 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8158 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%


 54%|█████▍    | 8159/15000 [16:21<13:39,  8.34it/s]


 epoch: 8159 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

input:       militiae to become highly placed prefects and procurators within the imperial administration the rise of provincial men to the

target:      militiae to become highly placed prefects and procurators within the imperial administration the rise of provincial men to the senatorial

prediction:  militiae to become highly placed prefects and procurators within the imperial administration the rise of provincial men to the the


 54%|█████▍    | 8161/15000 [16:21<15:23,  7.40it/s]


 epoch: 8160 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8161 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 54%|█████▍    | 8163/15000 [16:22<14:30,  7.85it/s]


 epoch: 8162 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8163 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 54%|█████▍    | 8165/15000 [16:22<23:09,  4.92it/s]


 epoch: 8164 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 8165 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 54%|█████▍    | 8167/15000 [16:22<17:08,  6.65it/s]


 epoch: 8166 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8167 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 8168 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 54%|█████▍    | 8170/15000 [16:23<14:39,  7.77it/s]


 epoch: 8169 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

input:       trading frenzy vollero appointment spurred speculation of an initial public offering move that senior leaders have considered publicly in

target:      trading frenzy vollero appointment spurred speculation of an initial public offering move that senior leaders have considered publicly in december

prediction:  trading frenzy vollero appointment spurred speculation of an initial public offering move that senior leaders have considered publicly in the

 epoch: 8170 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%


 54%|█████▍    | 8173/15000 [16:23<12:13,  9.30it/s]


 epoch: 8171 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8172 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8173 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%


 55%|█████▍    | 8176/15000 [16:23<11:05, 10.26it/s]


 epoch: 8174 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8175 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 8176 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8177 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 55%|█████▍    | 8179/15000 [16:24<14:54,  7.63it/s]


 epoch: 8178 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8179 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       opening act before the main show often used at the filming of television comedies in front of studio audiences

target:      opening act before the main show often used at the filming of television comedies in front of studio audiences work

prediction:  opening act before the main show often used at the filming of television comedies in front of studio audiences the


 55%|█████▍    | 8182/15000 [16:24<12:53,  8.82it/s]


 epoch: 8180 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8181 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8182 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%


 55%|█████▍    | 8184/15000 [16:24<11:58,  9.48it/s]


 epoch: 8183 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8184 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8185 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%


 55%|█████▍    | 8188/15000 [16:25<10:52, 10.44it/s]


 epoch: 8186 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8187 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%

 epoch: 8188 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%


 55%|█████▍    | 8190/15000 [16:25<11:26,  9.91it/s]


 epoch: 8189 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

input:       notebooks leonardo first stated the laws of sliding friction in his inspiration for investigating friction came about in part

target:      notebooks leonardo first stated the laws of sliding friction in his inspiration for investigating friction came about in part from

prediction:  notebooks leonardo first stated the laws of sliding friction in his inspiration for investigating friction came about in part the

 epoch: 8190 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8191 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 55%|█████▍    | 8193/15000 [16:26<18:35,  6.10it/s]


 epoch: 8192 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8193 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8194 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%


 55%|█████▍    | 8197/15000 [16:26<13:49,  8.20it/s]


 epoch: 8195 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8196 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8197 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 55%|█████▍    | 8199/15000 [16:26<12:32,  9.04it/s]


 epoch: 8198 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8199 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       led to an increase in bachelors in countries such as china and india the first genetically modified children were

target:      led to an increase in bachelors in countries such as china and india the first genetically modified children were born

prediction:  led to an increase in bachelors in countries such as china and india the first genetically modified children were the

 epoch: 8200 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 55%|█████▍    | 8203/15000 [16:27<11:28,  9.88it/s]


 epoch: 8201 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8202 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8203 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 55%|█████▍    | 8205/15000 [16:27<11:21,  9.97it/s]


 epoch: 8204 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8205 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 55%|█████▍    | 8207/15000 [16:27<18:12,  6.22it/s]


 epoch: 8206 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8207 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8208 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 55%|█████▍    | 8210/15000 [16:28<15:39,  7.23it/s]


 epoch: 8209 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       children experience hunger and do not know where they will get their next meal or when as of june

target:      children experience hunger and do not know where they will get their next meal or when as of june update

prediction:  children experience hunger and do not know where they will get their next meal or when as of june the

 epoch: 8210 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 55%|█████▍    | 8213/15000 [16:28<13:20,  8.48it/s]


 epoch: 8211 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.8%

 epoch: 8212 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8213 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 55%|█████▍    | 8216/15000 [16:28<12:09,  9.30it/s]


 epoch: 8214 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8215 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8216 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 55%|█████▍    | 8219/15000 [16:29<11:20,  9.97it/s]


 epoch: 8217 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8218 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8219 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       in smalltalk an anonymous function expression block constructs an instance of the library blockcontext class conversely scheme contains multiple

target:      in smalltalk an anonymous function expression block constructs an instance of the library blockcontext class conversely scheme contains multiple coherent

prediction:  in smalltalk an anonymous function expression block constructs an instance of the library blockcontext class conversely scheme contains multiple of

 epoch: 8220 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 55%|█████▍    | 8223/15000 [16:29<14:39,  7.71it/s]


 epoch: 8221 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8222 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8223 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 55%|█████▍    | 8226/15000 [16:29<12:27,  9.06it/s]


 epoch: 8224 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 8225 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8226 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 55%|█████▍    | 8228/15000 [16:30<11:43,  9.63it/s]


 epoch: 8227 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8228 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8229 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 55%|█████▍    | 8230/15000 [16:30<12:17,  9.18it/s]


input:       native hawaiian musician and hawaiian sovereignty activist israel kamakawiwo ole famous for his medley of somewhere over the rainbow

target:      native hawaiian musician and hawaiian sovereignty activist israel kamakawiwo ole famous for his medley of somewhere over the rainbow what

prediction:  native hawaiian musician and hawaiian sovereignty activist israel kamakawiwo ole famous for his medley of somewhere over the rainbow the

 epoch: 8230 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8231 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 55%|█████▍    | 8234/15000 [16:30<11:14, 10.03it/s]


 epoch: 8232 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8233 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8234 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


 55%|█████▍    | 8236/15000 [16:31<18:45,  6.01it/s]


 epoch: 8235 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8236 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 8237 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 55%|█████▍    | 8240/15000 [16:31<14:36,  7.71it/s]


 epoch: 8238 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.5%

 epoch: 8239 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       by south america and the caribbean sea and to the west and south by the pacific ocean the region

target:      by south america and the caribbean sea and to the west and south by the pacific ocean the region includes

prediction:  by south america and the caribbean sea and to the west and south by the pacific ocean the region the

 epoch: 8240 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 55%|█████▍    | 8242/15000 [16:31<13:10,  8.55it/s]


 epoch: 8241 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

 epoch: 8242 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8243 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 55%|█████▍    | 8246/15000 [16:32<11:27,  9.82it/s]


 epoch: 8244 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8245 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8246 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 55%|█████▍    | 8248/15000 [16:32<11:14, 10.02it/s]


 epoch: 8247 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8248 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 55%|█████▌    | 8250/15000 [16:33<20:00,  5.62it/s]


 epoch: 8249 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

input:       capital should be refunded by the goods they would bring up in the new world the discoveries of marco

target:      capital should be refunded by the goods they would bring up in the new world the discoveries of marco polo

prediction:  capital should be refunded by the goods they would bring up in the new world the discoveries of marco the

 epoch: 8250 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 55%|█████▌    | 8252/15000 [16:33<17:56,  6.27it/s]


 epoch: 8251 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8252 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.9%


 55%|█████▌    | 8254/15000 [16:33<16:08,  6.97it/s]


 epoch: 8253 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8254 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 55%|█████▌    | 8256/15000 [16:33<15:44,  7.14it/s]


 epoch: 8255 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8256 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 55%|█████▌    | 8258/15000 [16:34<14:20,  7.83it/s]


 epoch: 8257 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.7%

 epoch: 8258 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%


 55%|█████▌    | 8260/15000 [16:34<16:34,  6.78it/s]


 epoch: 8259 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

input:       machine tool industry enabled the to have large scale manufacturing of sewing machines bicycles and other items in

target:      machine tool industry enabled the to have large scale manufacturing of sewing machines bicycles and other items in the

prediction:  machine tool industry enabled the to have large scale manufacturing of sewing machines bicycles and other items in the

 epoch: 8260 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 55%|█████▌    | 8262/15000 [16:34<15:42,  7.15it/s]


 epoch: 8261 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%

 epoch: 8262 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 55%|█████▌    | 8264/15000 [16:35<25:09,  4.46it/s]


 epoch: 8263 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8264 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 55%|█████▌    | 8266/15000 [16:35<19:10,  5.85it/s]


 epoch: 8265 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8266 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 55%|█████▌    | 8268/15000 [16:35<16:11,  6.93it/s]


 epoch: 8267 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8268 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 55%|█████▌    | 8270/15000 [16:36<16:18,  6.88it/s]


 epoch: 8269 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

input:       been simpler he approved humphrey opinion and he endorsed susan blackmore project to give scientific theory of memes complete

target:      been simpler he approved humphrey opinion and he endorsed susan blackmore project to give scientific theory of memes complete with

prediction:  been simpler he approved humphrey opinion and he endorsed susan blackmore project to give scientific theory of memes complete the

 epoch: 8270 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 55%|█████▌    | 8272/15000 [16:36<13:46,  8.14it/s]


 epoch: 8271 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8272 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8273 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 55%|█████▌    | 8276/15000 [16:36<11:03, 10.13it/s]


 epoch: 8274 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8275 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8276 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8277 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 55%|█████▌    | 8278/15000 [16:37<16:16,  6.89it/s]


 epoch: 8278 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%

 epoch: 8279 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       headquartered in addis ababa africa straddles the equator and the prime meridian it is the only continent to stretch

target:      headquartered in addis ababa africa straddles the equator and the prime meridian it is the only continent to stretch from

prediction:  headquartered in addis ababa africa straddles the equator and the prime meridian it is the only continent to stretch the


 55%|█████▌    | 8282/15000 [16:37<13:12,  8.48it/s]


 epoch: 8280 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8281 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8282 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 55%|█████▌    | 8285/15000 [16:37<11:49,  9.47it/s]


 epoch: 8283 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8284 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8285 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 55%|█████▌    | 8287/15000 [16:38<11:18,  9.89it/s]


 epoch: 8286 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8287 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8288 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 55%|█████▌    | 8289/15000 [16:38<11:09, 10.03it/s]


 epoch: 8289 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       sounds gestures letters or symbols depending on whether the language is spoken signed or written and they can be

target:      sounds gestures letters or symbols depending on whether the language is spoken signed or written and they can be combined

prediction:  sounds gestures letters or symbols depending on whether the language is spoken signed or written and they can be the

 epoch: 8290 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 55%|█████▌    | 8291/15000 [16:38<11:22,  9.83it/s]


 epoch: 8291 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8292 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 55%|█████▌    | 8294/15000 [16:38<11:38,  9.60it/s]


 epoch: 8293 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8294 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 8295 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 55%|█████▌    | 8298/15000 [16:39<10:18, 10.84it/s]


 epoch: 8296 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8297 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8298 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 55%|█████▌    | 8300/15000 [16:39<10:54, 10.24it/s]


 epoch: 8299 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       maintained day job with jalbum net at first he later quit in order to work on minecraft full time

target:      maintained day job with jalbum net at first he later quit in order to work on minecraft full time as

prediction:  maintained day job with jalbum net at first he later quit in order to work on minecraft full time the

 epoch: 8300 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.1%

 epoch: 8301 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.3%


 55%|█████▌    | 8304/15000 [16:39<10:25, 10.70it/s]


 epoch: 8302 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8303 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8304 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8305 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 55%|█████▌    | 8308/15000 [16:40<15:40,  7.12it/s]


 epoch: 8306 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8307 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8308 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 55%|█████▌    | 8310/15000 [16:40<14:31,  7.68it/s]


 epoch: 8309 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

input:       films influenced by french and italian realist pictures of the post war period the st century has been marked

target:      films influenced by french and italian realist pictures of the post war period the st century has been marked by

prediction:  films influenced by french and italian realist pictures of the post war period the st century has been marked the

 epoch: 8310 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 55%|█████▌    | 8312/15000 [16:40<13:12,  8.44it/s]


 epoch: 8311 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8312 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8313 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 55%|█████▌    | 8316/15000 [16:41<11:46,  9.46it/s]


 epoch: 8314 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8315 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 8316 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 55%|█████▌    | 8318/15000 [16:41<11:35,  9.60it/s]


 epoch: 8317 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8318 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8319 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       high energy physics because many elementary particles do not occur naturally but are created only during high energy collisions

target:      high energy physics because many elementary particles do not occur naturally but are created only during high energy collisions of

prediction:  high energy physics because many elementary particles do not occur naturally but are created only during high energy collisions the


 55%|█████▌    | 8322/15000 [16:42<14:03,  7.92it/s]


 epoch: 8320 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8321 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8322 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 55%|█████▌    | 8324/15000 [16:42<12:50,  8.67it/s]


 epoch: 8323 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8324 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8325 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 56%|█████▌    | 8328/15000 [16:42<11:16,  9.87it/s]


 epoch: 8326 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8327 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 8328 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 56%|█████▌    | 8330/15000 [16:42<11:33,  9.62it/s]


 epoch: 8329 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

input:       revolution and the establishment of the first republic as result of which the monarchy and many of the nobility

target:      revolution and the establishment of the first republic as result of which the monarchy and many of the nobility perished

prediction:  revolution and the establishment of the first republic as result of which the monarchy and many of the nobility the

 epoch: 8330 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 56%|█████▌    | 8332/15000 [16:43<11:07,  9.99it/s]


 epoch: 8331 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8332 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 8333 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 56%|█████▌    | 8336/15000 [16:43<14:36,  7.60it/s]


 epoch: 8334 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.4%

 epoch: 8335 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8336 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 56%|█████▌    | 8339/15000 [16:44<12:24,  8.95it/s]


 epoch: 8337 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8338 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8339 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 56%|█████▌    | 8341/15000 [16:44<12:23,  8.96it/s]


input:       the object abstraction in object oriented programming method call executed locally on proxy object invokes the corresponding method on

target:      the object abstraction in object oriented programming method call executed locally on proxy object invokes the corresponding method on the

prediction:  the object abstraction in object oriented programming method call executed locally on proxy object invokes the corresponding method on the

 epoch: 8340 | train_loss: 0.24, train_acc: 96.6% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8341 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 56%|█████▌    | 8344/15000 [16:44<11:17,  9.82it/s]


 epoch: 8342 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8343 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8344 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 56%|█████▌    | 8346/15000 [16:44<11:07,  9.97it/s]


 epoch: 8345 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8346 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8347 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 56%|█████▌    | 8348/15000 [16:45<11:07,  9.96it/s]


 epoch: 8348 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 56%|█████▌    | 8350/15000 [16:45<18:54,  5.86it/s]


 epoch: 8349 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       judaism the oldest of the abrahamic faiths is practiced primarily in israel the indigenous homeland and historical birthplace of

target:      judaism the oldest of the abrahamic faiths is practiced primarily in israel the indigenous homeland and historical birthplace of the

prediction:  judaism the oldest of the abrahamic faiths is practiced primarily in israel the indigenous homeland and historical birthplace of the

 epoch: 8350 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 56%|█████▌    | 8352/15000 [16:45<16:12,  6.84it/s]


 epoch: 8351 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8352 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8353 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 56%|█████▌    | 8355/15000 [16:46<13:48,  8.02it/s]


 epoch: 8354 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8355 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 56%|█████▌    | 8357/15000 [16:46<12:56,  8.56it/s]


 epoch: 8356 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8357 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%


 56%|█████▌    | 8359/15000 [16:46<12:35,  8.79it/s]


 epoch: 8358 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8359 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.6%

input:       expansion of farming communities and the rapid adoption of rice cultivation around the niger river by the first millennium


 56%|█████▌    | 8360/15000 [16:46<14:43,  7.52it/s]


target:      expansion of farming communities and the rapid adoption of rice cultivation around the niger river by the first millennium bc

prediction:  expansion of farming communities and the rapid adoption of rice cultivation around the niger river by the first millennium the

 epoch: 8360 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 8361 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 56%|█████▌    | 8362/15000 [16:46<13:25,  8.24it/s]


 epoch: 8362 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 56%|█████▌    | 8364/15000 [16:47<18:10,  6.08it/s]


 epoch: 8363 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 8364 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 56%|█████▌    | 8366/15000 [16:47<16:53,  6.54it/s]


 epoch: 8365 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8366 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 56%|█████▌    | 8368/15000 [16:47<15:48,  6.99it/s]


 epoch: 8367 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 8368 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 56%|█████▌    | 8369/15000 [16:48<15:10,  7.28it/s]


 epoch: 8369 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       and mand cultures started to collect wild millet around to bce later gourds watermelons castor beans and cotton were

target:      and mand cultures started to collect wild millet around to bce later gourds watermelons castor beans and cotton were also

prediction:  and mand cultures started to collect wild millet around to bce later gourds watermelons castor beans and cotton were the


 56%|█████▌    | 8371/15000 [16:48<16:34,  6.66it/s]


 epoch: 8370 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8371 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 56%|█████▌    | 8373/15000 [16:48<15:25,  7.16it/s]


 epoch: 8372 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8373 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 56%|█████▌    | 8375/15000 [16:48<14:06,  7.82it/s]


 epoch: 8374 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8375 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 56%|█████▌    | 8376/15000 [16:49<14:00,  7.88it/s]


 epoch: 8376 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 56%|█████▌    | 8378/15000 [16:49<17:30,  6.31it/s]


 epoch: 8377 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%

 epoch: 8378 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 56%|█████▌    | 8380/15000 [16:49<18:26,  5.99it/s]


 epoch: 8379 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

input:       able to loosen the grip of negative emotions on people thinking distancing of thought leads to distancing of the

target:      able to loosen the grip of negative emotions on people thinking distancing of thought leads to distancing of the unilateral

prediction:  able to loosen the grip of negative emotions on people thinking distancing of thought leads to distancing of the the

 epoch: 8380 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 56%|█████▌    | 8382/15000 [16:50<15:39,  7.04it/s]


 epoch: 8381 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8382 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 56%|█████▌    | 8384/15000 [16:50<13:46,  8.00it/s]


 epoch: 8383 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8384 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8385 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 56%|█████▌    | 8387/15000 [16:50<12:08,  9.08it/s]


 epoch: 8386 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8387 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 56%|█████▌    | 8389/15000 [16:50<11:42,  9.42it/s]


 epoch: 8388 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8389 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.3%

input:       eastern region and dispersed from there to europe and parts of africa during the neolithic haplogroup is carried

target:      eastern region and dispersed from there to europe and parts of africa during the neolithic haplogroup is carried by

prediction: 

 56%|█████▌    | 8390/15000 [16:50<12:59,  8.48it/s]

 eastern region and dispersed from there to europe and parts of africa during the neolithic haplogroup is carried the

 epoch: 8390 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.8%


 56%|█████▌    | 8393/15000 [16:51<19:53,  5.54it/s]


 epoch: 8391 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8392 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8393 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 56%|█████▌    | 8396/15000 [16:51<14:16,  7.71it/s]


 epoch: 8394 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8395 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8396 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 56%|█████▌    | 8398/15000 [16:52<12:43,  8.65it/s]


 epoch: 8397 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8398 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8399 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 56%|█████▌    | 8400/15000 [16:52<12:29,  8.81it/s]


input:       in september valve company worked with the publisher perfect world to release global offensive in mainland china chinese citizens

target:      in september valve company worked with the publisher perfect world to release global offensive in mainland china chinese citizens with

prediction:  in september valve company worked with the publisher perfect world to release global offensive in mainland china chinese citizens the

 epoch: 8400 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8401 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.1%


 56%|█████▌    | 8404/15000 [16:52<11:11,  9.83it/s]


 epoch: 8402 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8403 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%

 epoch: 8404 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8405 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 56%|█████▌    | 8408/15000 [16:53<16:13,  6.77it/s]


 epoch: 8406 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8407 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8408 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 56%|█████▌    | 8410/15000 [16:53<15:10,  7.24it/s]


 epoch: 8409 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       arrival of napster in and similar projects which effectively catered to music enthusiasts especially teenagers and young adults soon

target:      arrival of napster in and similar projects which effectively catered to music enthusiasts especially teenagers and young adults soon becoming

prediction:  arrival of napster in and similar projects which effectively catered to music enthusiasts especially teenagers and young adults soon the

 epoch: 8410 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 56%|█████▌    | 8413/15000 [16:54<12:23,  8.86it/s]


 epoch: 8411 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 8412 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 8413 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 56%|█████▌    | 8415/15000 [16:54<11:38,  9.43it/s]


 epoch: 8414 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8415 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 8416 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 56%|█████▌    | 8419/15000 [16:54<10:45, 10.19it/s]


 epoch: 8417 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8418 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8419 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       social purposes informing in turn the study of the social functions of language and grammatical description neurolinguistics studies how

target:      social purposes informing in turn the study of the social functions of language and grammatical description neurolinguistics studies how language

prediction:  social purposes informing in turn the study of the social functions of language and grammatical description neurolinguistics studies how the


 56%|█████▌    | 8421/15000 [16:55<19:24,  5.65it/s]


 epoch: 8420 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8421 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8422 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 56%|█████▌    | 8425/15000 [16:55<14:12,  7.71it/s]


 epoch: 8423 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8424 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8425 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 56%|█████▌    | 8428/15000 [16:55<12:09,  9.00it/s]


 epoch: 8426 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8427 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8428 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%


 56%|█████▌    | 8430/15000 [16:56<12:06,  9.04it/s]


 epoch: 8429 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       stand up comedy differs from most other performing arts as the comedian is usually the only thing on stage

target:      stand up comedy differs from most other performing arts as the comedian is usually the only thing on stage and

prediction:  stand up comedy differs from most other performing arts as the comedian is usually the only thing on stage the

 epoch: 8430 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 56%|█████▌    | 8432/15000 [16:56<11:38,  9.40it/s]


 epoch: 8431 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8432 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8433 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 56%|█████▌    | 8436/15000 [16:57<16:03,  6.81it/s]


 epoch: 8434 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8435 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8436 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 56%|█████▋    | 8439/15000 [16:57<13:17,  8.23it/s]


 epoch: 8437 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8438 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8439 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 56%|█████▋    | 8440/15000 [16:57<13:47,  7.92it/s]


input:       would trigger an error on the undefined variable during compilation however the program would still be syntactically correct since

target:      would trigger an error on the undefined variable during compilation however the program would still be syntactically correct since type

prediction:  would trigger an error on the undefined variable during compilation however the program would still be syntactically correct since the

 epoch: 8440 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 8441 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 56%|█████▋    | 8444/15000 [16:57<11:20,  9.63it/s]


 epoch: 8442 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8443 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8444 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 56%|█████▋    | 8446/15000 [16:58<11:18,  9.66it/s]


 epoch: 8445 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8446 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 8447 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 56%|█████▋    | 8448/15000 [16:58<11:21,  9.61it/s]


 epoch: 8448 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 56%|█████▋    | 8450/15000 [16:58<17:28,  6.25it/s]


 epoch: 8449 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

input:       descendants of the second wave would begin to expand and spread into the more remote islands at around the

target:      descendants of the second wave would begin to expand and spread into the more remote islands at around the same

prediction:  descendants of the second wave would begin to expand and spread into the more remote islands at around the the

 epoch: 8450 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 56%|█████▋    | 8453/15000 [16:59<13:31,  8.06it/s]


 epoch: 8451 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8452 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8453 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 56%|█████▋    | 8455/15000 [16:59<12:20,  8.84it/s]


 epoch: 8454 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8455 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.1%

 epoch: 8456 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 56%|█████▋    | 8459/15000 [16:59<10:53, 10.01it/s]


 epoch: 8457 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.8%

 epoch: 8458 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 8459 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 56%|█████▋    | 8461/15000 [17:00<11:33,  9.43it/s]


input:       waterfowl hunting would have been common for egyptians and this is also the period when many animals were first

target:      waterfowl hunting would have been common for egyptians and this is also the period when many animals were first domesticated

prediction:  waterfowl hunting would have been common for egyptians and this is also the period when many animals were first the

 epoch: 8460 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8461 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 56%|█████▋    | 8462/15000 [17:00<11:42,  9.31it/s]


 epoch: 8462 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 56%|█████▋    | 8464/15000 [17:00<19:38,  5.55it/s]


 epoch: 8463 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8464 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 56%|█████▋    | 8466/15000 [17:01<17:39,  6.16it/s]


 epoch: 8465 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8466 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 56%|█████▋    | 8468/15000 [17:01<15:40,  6.94it/s]


 epoch: 8467 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 8468 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%


 56%|█████▋    | 8470/15000 [17:01<16:23,  6.64it/s]


 epoch: 8469 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       of chile venezuela peru and colombia the largest ports in commercial movement are those of buenos aires santos rio

target:      of chile venezuela peru and colombia the largest ports in commercial movement are those of buenos aires santos rio de

prediction:  of chile venezuela peru and colombia the largest ports in commercial movement are those of buenos aires santos rio the

 epoch: 8470 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 56%|█████▋    | 8472/15000 [17:01<14:25,  7.54it/s]


 epoch: 8471 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8472 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 56%|█████▋    | 8474/15000 [17:02<13:43,  7.92it/s]


 epoch: 8473 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8474 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 57%|█████▋    | 8476/15000 [17:02<13:12,  8.23it/s]


 epoch: 8475 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 8476 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 57%|█████▋    | 8477/15000 [17:02<12:55,  8.41it/s]


 epoch: 8477 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 57%|█████▋    | 8478/15000 [17:02<16:08,  6.73it/s]


 epoch: 8478 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8479 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

input:       psychology approaches thought and behavior from modern evolutionary perspective this perspective suggests that psychological adaptations evolved to solve recurrent

target:      psychology approaches thought and behavior from modern evolutionary perspective this perspective suggests that psychological adaptations evolved to solve recurrent problems

prediction:  psychology approaches thought and behavior from modern evolutionary perspective this perspective suggests that psychological adaptations evolved to solve recurrent the


 57%|█████▋    | 8481/15000 [17:03<14:21,  7.57it/s]


 epoch: 8480 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8481 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 57%|█████▋    | 8483/15000 [17:03<13:22,  8.12it/s]


 epoch: 8482 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8483 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 57%|█████▋    | 8485/15000 [17:03<13:17,  8.17it/s]


 epoch: 8484 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8485 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 57%|█████▋    | 8487/15000 [17:03<13:14,  8.20it/s]


 epoch: 8486 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8487 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 57%|█████▋    | 8489/15000 [17:03<12:54,  8.40it/s]


 epoch: 8488 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8489 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 57%|█████▋    | 8490/15000 [17:04<15:19,  7.08it/s]


input:       the llama anaconda piranha jaguar vicu and tapir the amazon rainforests possess high biodiversity containing major proportion of earth

target:      the llama anaconda piranha jaguar vicu and tapir the amazon rainforests possess high biodiversity containing major proportion of earth species

prediction:  the llama anaconda piranha jaguar vicu and tapir the amazon rainforests possess high biodiversity containing major proportion of earth the

 epoch: 8490 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 57%|█████▋    | 8491/15000 [17:04<14:11,  7.64it/s]


 epoch: 8491 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 57%|█████▋    | 8494/15000 [17:04<15:17,  7.09it/s]


 epoch: 8492 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8493 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%

 epoch: 8494 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 57%|█████▋    | 8497/15000 [17:05<12:06,  8.95it/s]


 epoch: 8495 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8496 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8497 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 57%|█████▋    | 8499/15000 [17:05<11:06,  9.76it/s]


 epoch: 8498 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8499 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       there was much of it and said that the elo rating system seemed ineffective with many players of various

target:      there was much of it and said that the elo rating system seemed ineffective with many players of various skill

prediction:  there was much of it and said that the elo rating system seemed ineffective with many players of various the


 57%|█████▋    | 8501/15000 [17:05<12:02,  8.99it/s]


 epoch: 8500 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8501 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8502 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 57%|█████▋    | 8505/15000 [17:05<10:55,  9.91it/s]


 epoch: 8503 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8504 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8505 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 57%|█████▋    | 8507/15000 [17:06<18:17,  5.92it/s]


 epoch: 8506 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8507 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%

 epoch: 8508 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 57%|█████▋    | 8510/15000 [17:06<15:23,  7.03it/s]


 epoch: 8509 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

input:       country mexico maintains both modern and outdated industrial and agricultural facilities and operations its main sources of income are

target:      country mexico maintains both modern and outdated industrial and agricultural facilities and operations its main sources of income are oil

prediction:  country mexico maintains both modern and outdated industrial and agricultural facilities and operations its main sources of income are the

 epoch: 8510 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 57%|█████▋    | 8512/15000 [17:06<13:27,  8.03it/s]


 epoch: 8511 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8512 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8513 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 57%|█████▋    | 8516/15000 [17:07<11:23,  9.49it/s]


 epoch: 8514 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8515 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8516 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 57%|█████▋    | 8518/15000 [17:07<11:00,  9.82it/s]


 epoch: 8517 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8518 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8519 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       asian religions such as japanese buddhism shintoism and shinto derived japanese new religions are common in brazil and peru

target:      asian religions such as japanese buddhism shintoism and shinto derived japanese new religions are common in brazil and peru korean

prediction:  asian religions such as japanese buddhism shintoism and shinto derived japanese new religions are common in brazil and peru the


 57%|█████▋    | 8521/15000 [17:08<15:40,  6.89it/s]


 epoch: 8520 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8521 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 8522 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 57%|█████▋    | 8525/15000 [17:08<12:17,  8.78it/s]


 epoch: 8523 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8524 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8525 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 57%|█████▋    | 8527/15000 [17:08<11:33,  9.34it/s]


 epoch: 8526 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8527 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8528 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%


 57%|█████▋    | 8529/15000 [17:08<11:01,  9.78it/s]


 epoch: 8529 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       downloads version of the repository with the malicious directory then switches to that directory the git directory will be

target:      downloads version of the repository with the malicious directory then switches to that directory the git directory will be overwritten

prediction:  downloads version of the repository with the malicious directory then switches to that directory the git directory will be the

 epoch: 8530 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 57%|█████▋    | 8533/15000 [17:09<10:41, 10.07it/s]


 epoch: 8531 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8532 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8533 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8534 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.7%


 57%|█████▋    | 8537/15000 [17:09<13:40,  7.88it/s]


 epoch: 8535 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8536 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 8537 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%


 57%|█████▋    | 8539/15000 [17:10<12:31,  8.60it/s]


 epoch: 8538 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8539 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

input:       and legumes began around bc in china taro cultivation in new guinea dates to about bc also with squash

target:      and legumes began around bc in china taro cultivation in new guinea dates to about bc also with squash cultivation

prediction:  and legumes began around bc in china taro cultivation in new guinea dates to about bc also with squash the


 57%|█████▋    | 8541/15000 [17:10<12:16,  8.77it/s]


 epoch: 8540 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.0%

 epoch: 8541 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8542 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 57%|█████▋    | 8545/15000 [17:10<10:40, 10.08it/s]


 epoch: 8543 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8544 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8545 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 57%|█████▋    | 8547/15000 [17:10<10:44, 10.00it/s]


 epoch: 8546 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8547 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 8548 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 57%|█████▋    | 8550/15000 [17:11<17:15,  6.23it/s]


 epoch: 8549 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

input:       linked to the ability to learn from experience to understand and to employ knowledge and skills to solve problems

target:      linked to the ability to learn from experience to understand and to employ knowledge and skills to solve problems those

prediction:  linked to the ability to learn from experience to understand and to employ knowledge and skills to solve problems the

 epoch: 8550 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%


 57%|█████▋    | 8552/15000 [17:11<14:59,  7.17it/s]


 epoch: 8551 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8552 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8553 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 57%|█████▋    | 8556/15000 [17:12<11:39,  9.21it/s]


 epoch: 8554 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8555 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8556 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 57%|█████▋    | 8558/15000 [17:12<11:07,  9.66it/s]


 epoch: 8557 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8558 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 8559 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 57%|█████▋    | 8560/15000 [17:12<11:23,  9.42it/s]


input:       louis xii who considered commissioning the artist to make some portraits leonardo may have commenced project for an equestrian

target:      louis xii who considered commissioning the artist to make some portraits leonardo may have commenced project for an equestrian figure

prediction:  louis xii who considered commissioning the artist to make some portraits leonardo may have commenced project for an equestrian the

 epoch: 8560 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8561 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 57%|█████▋    | 8562/15000 [17:12<10:57,  9.79it/s]


 epoch: 8562 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 57%|█████▋    | 8564/15000 [17:13<18:45,  5.72it/s]


 epoch: 8563 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8564 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8565 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%


 57%|█████▋    | 8568/15000 [17:13<13:52,  7.73it/s]


 epoch: 8566 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8567 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8568 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 57%|█████▋    | 8570/15000 [17:14<13:31,  7.92it/s]


 epoch: 8569 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

input:       the previous command produces an error the command separator should be used for example on windows xp or later

target:      the previous command produces an error the command separator should be used for example on windows xp or later the

prediction:  the previous command produces an error the command separator should be used for example on windows xp or later the

 epoch: 8570 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 57%|█████▋    | 8572/15000 [17:14<13:04,  8.19it/s]


 epoch: 8571 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8572 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 57%|█████▋    | 8574/15000 [17:14<13:26,  7.96it/s]


 epoch: 8573 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8574 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 57%|█████▋    | 8576/15000 [17:14<13:11,  8.12it/s]


 epoch: 8575 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8576 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 57%|█████▋    | 8577/15000 [17:14<13:04,  8.19it/s]


 epoch: 8577 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 57%|█████▋    | 8579/15000 [17:15<23:35,  4.54it/s]


 epoch: 8578 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8579 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 57%|█████▋    | 8580/15000 [17:15<22:09,  4.83it/s]


input:       organism in humans for example eye colour is an inherited characteristic and an individual might inherit the brown eye

target:      organism in humans for example eye colour is an inherited characteristic and an individual might inherit the brown eye trait

prediction:  organism in humans for example eye colour is an inherited characteristic and an individual might inherit the brown eye the

 epoch: 8580 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.8%


 57%|█████▋    | 8582/15000 [17:16<17:28,  6.12it/s]


 epoch: 8581 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8582 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 57%|█████▋    | 8584/15000 [17:16<14:30,  7.37it/s]


 epoch: 8583 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8584 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 57%|█████▋    | 8586/15000 [17:16<14:16,  7.49it/s]


 epoch: 8585 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8586 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 57%|█████▋    | 8588/15000 [17:16<13:29,  7.92it/s]


 epoch: 8587 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8588 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 57%|█████▋    | 8590/15000 [17:17<14:34,  7.33it/s]


 epoch: 8589 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       over the past few thousand years present day extinction rates are times greater than the background rate and up

target:      over the past few thousand years present day extinction rates are times greater than the background rate and up to

prediction:  over the past few thousand years present day extinction rates are times greater than the background rate and up the

 epoch: 8590 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 57%|█████▋    | 8591/15000 [17:17<14:22,  7.43it/s]


 epoch: 8591 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 57%|█████▋    | 8593/15000 [17:17<20:33,  5.19it/s]


 epoch: 8592 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8593 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 57%|█████▋    | 8595/15000 [17:17<15:59,  6.68it/s]


 epoch: 8594 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8595 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8596 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 57%|█████▋    | 8599/15000 [17:18<11:37,  9.18it/s]


 epoch: 8597 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8598 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8599 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 57%|█████▋    | 8600/15000 [17:18<12:42,  8.39it/s]


input:       december he was then forced to abdicate by the legitimate augustus constantius the usurper magnentius would continue to rule

target:      december he was then forced to abdicate by the legitimate augustus constantius the usurper magnentius would continue to rule the

prediction:  december he was then forced to abdicate by the legitimate augustus constantius the usurper magnentius would continue to rule the

 epoch: 8600 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8601 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 57%|█████▋    | 8604/15000 [17:18<10:42,  9.95it/s]


 epoch: 8602 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8603 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8604 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 57%|█████▋    | 8606/15000 [17:19<11:56,  8.92it/s]


 epoch: 8605 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8606 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 57%|█████▋    | 8609/15000 [17:19<10:51,  9.82it/s]


 epoch: 8607 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8608 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 8609 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 57%|█████▋    | 8611/15000 [17:19<11:12,  9.51it/s]


input:       end of the permian the climate became drier and hotter over much of gondwana and the glossopterid forest ecosystems

target:      end of the permian the climate became drier and hotter over much of gondwana and the glossopterid forest ecosystems collapsed

prediction:  end of the permian the climate became drier and hotter over much of gondwana and the glossopterid forest ecosystems the

 epoch: 8610 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8611 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 57%|█████▋    | 8613/15000 [17:19<10:37, 10.03it/s]


 epoch: 8612 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8613 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 8614 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.0%


 57%|█████▋    | 8617/15000 [17:20<09:54, 10.73it/s]


 epoch: 8615 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8616 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8617 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 57%|█████▋    | 8619/15000 [17:20<10:02, 10.59it/s]


 epoch: 8618 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8619 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

input:       cm in black chalk on coloured paper of the virgin and child with saint anne and saint john the

target:      cm in black chalk on coloured paper of the virgin and child with saint anne and saint john the baptist

prediction:  cm in black chalk on coloured paper of the virgin and child with saint anne and saint john the the

 epoch: 8620 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 57%|█████▋    | 8623/15000 [17:21<15:18,  6.94it/s]


 epoch: 8621 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%

 epoch: 8622 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8623 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 58%|█████▊    | 8626/15000 [17:21<12:37,  8.42it/s]


 epoch: 8624 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8625 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.20, test_acc: 97.3%

 epoch: 8626 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 58%|█████▊    | 8628/15000 [17:21<11:33,  9.19it/s]


 epoch: 8627 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8628 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.4%

 epoch: 8629 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 58%|█████▊    | 8630/15000 [17:21<11:32,  9.20it/s]


input:       are linked to academic success further mental factors include self efficacy self esteem and metacognitive abilities unlike psychological factors

target:      are linked to academic success further mental factors include self efficacy self esteem and metacognitive abilities unlike psychological factors sociological

prediction:  are linked to academic success further mental factors include self efficacy self esteem and metacognitive abilities unlike psychological factors the

 epoch: 8630 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8631 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 58%|█████▊    | 8634/15000 [17:22<10:33, 10.05it/s]


 epoch: 8632 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8633 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8634 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 58%|█████▊    | 8636/15000 [17:22<17:24,  6.09it/s]


 epoch: 8635 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8636 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8637 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 58%|█████▊    | 8638/15000 [17:23<14:55,  7.10it/s]


 epoch: 8638 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8639 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

input:       common internet harassment tool colloquially known as doxxing via the site those who break the rule are subject to

target:      common internet harassment tool colloquially known as doxxing via the site those who break the rule are subject to site

prediction:  common internet harassment tool colloquially known as doxxing via the site those who break the rule are subject to the


 58%|█████▊    | 8641/15000 [17:23<13:31,  7.84it/s]


 epoch: 8640 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 8641 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 58%|█████▊    | 8643/15000 [17:23<12:45,  8.30it/s]


 epoch: 8642 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8643 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.2%

 epoch: 8644 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 58%|█████▊    | 8647/15000 [17:23<10:34, 10.02it/s]


 epoch: 8645 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8646 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8647 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


 58%|█████▊    | 8649/15000 [17:24<11:06,  9.52it/s]


 epoch: 8648 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8649 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 58%|█████▊    | 8650/15000 [17:24<12:06,  8.74it/s]


input:       offensive with eleague esports csgo and eleague cs go premier docu series on the tbs network counter strike global

target:      offensive with eleague esports csgo and eleague cs go premier docu series on the tbs network counter strike global offensive

prediction:  offensive with eleague esports csgo and eleague cs go premier docu series on the tbs network counter strike global the

 epoch: 8650 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8651 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 58%|█████▊    | 8654/15000 [17:24<10:19, 10.24it/s]


 epoch: 8652 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 8653 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8654 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 58%|█████▊    | 8656/15000 [17:24<10:12, 10.36it/s]


 epoch: 8655 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8656 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8657 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 58%|█████▊    | 8658/15000 [17:24<09:52, 10.71it/s]


 epoch: 8658 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8659 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

input:       biases which recur frequently in human thought the availability heuristic for example is the tendency to overestimate the importance

target:      biases which recur frequently in human thought the availability heuristic for example is the tendency to overestimate the importance of

prediction:  biases which recur frequently in human thought the availability heuristic for example is the tendency to overestimate the importance the


 58%|█████▊    | 8662/15000 [17:25<10:21, 10.21it/s]


 epoch: 8660 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 8661 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8662 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 58%|█████▊    | 8664/15000 [17:26<17:13,  6.13it/s]


 epoch: 8663 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8664 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8665 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 58%|█████▊    | 8668/15000 [17:26<12:57,  8.14it/s]


 epoch: 8666 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8667 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8668 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 58%|█████▊    | 8670/15000 [17:26<12:20,  8.55it/s]


 epoch: 8669 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       the second least populated after antarctica oceania has diverse mix of economies from the highly developed and globally competitive

target:      the second least populated after antarctica oceania has diverse mix of economies from the highly developed and globally competitive financial

prediction:  the second least populated after antarctica oceania has diverse mix of economies from the highly developed and globally competitive the

 epoch: 8670 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8671 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 58%|█████▊    | 8672/15000 [17:26<11:17,  9.34it/s]


 epoch: 8672 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8673 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 58%|█████▊    | 8676/15000 [17:27<10:25, 10.12it/s]


 epoch: 8674 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8675 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8676 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.5%


 58%|█████▊    | 8678/15000 [17:27<17:05,  6.17it/s]


 epoch: 8677 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8678 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8679 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 58%|█████▊    | 8681/15000 [17:28<14:54,  7.06it/s]


input:       natural selection and population genetics based on mendelian inheritance into unified theory that included random genetic drift mutation and

target:      natural selection and population genetics based on mendelian inheritance into unified theory that included random genetic drift mutation and gene

prediction:  natural selection and population genetics based on mendelian inheritance into unified theory that included random genetic drift mutation and the

 epoch: 8680 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%

 epoch: 8681 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.9%


 58%|█████▊    | 8683/15000 [17:28<13:40,  7.70it/s]


 epoch: 8682 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8683 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 58%|█████▊    | 8685/15000 [17:28<13:08,  8.01it/s]


 epoch: 8684 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8685 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 58%|█████▊    | 8687/15000 [17:28<12:36,  8.34it/s]


 epoch: 8686 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8687 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 58%|█████▊    | 8689/15000 [17:28<12:39,  8.31it/s]


 epoch: 8688 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 8689 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 58%|█████▊    | 8690/15000 [17:29<14:37,  7.19it/s]


input:       both riograndense republic and juliana republic were reincorporated as provinces in the peru bolivian confederation short lived union of

target:      both riograndense republic and juliana republic were reincorporated as provinces in the peru bolivian confederation short lived union of peru

prediction:  both riograndense republic and juliana republic were reincorporated as provinces in the peru bolivian confederation short lived union of the

 epoch: 8690 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8691 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 58%|█████▊    | 8692/15000 [17:29<14:07,  7.45it/s]


 epoch: 8692 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 58%|█████▊    | 8694/15000 [17:29<12:59,  8.09it/s]


 epoch: 8693 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8694 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 58%|█████▊    | 8696/15000 [17:29<12:25,  8.45it/s]


 epoch: 8695 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%

 epoch: 8696 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 58%|█████▊    | 8698/15000 [17:30<12:45,  8.23it/s]


 epoch: 8697 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 8698 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 58%|█████▊    | 8700/15000 [17:30<14:18,  7.34it/s]


 epoch: 8699 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       pliny the history of asia can be seen as the distinct histories of several peripheral coastal regions east asia

target:      pliny the history of asia can be seen as the distinct histories of several peripheral coastal regions east asia south

prediction:  pliny the history of asia can be seen as the distinct histories of several peripheral coastal regions east asia the

 epoch: 8700 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 58%|█████▊    | 8702/15000 [17:30<13:59,  7.50it/s]


 epoch: 8701 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 8702 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.0%


 58%|█████▊    | 8704/15000 [17:30<12:51,  8.16it/s]


 epoch: 8703 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8704 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%


 58%|█████▊    | 8705/15000 [17:31<12:41,  8.27it/s]


 epoch: 8705 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 58%|█████▊    | 8707/15000 [17:31<22:16,  4.71it/s]


 epoch: 8706 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8707 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.0%


 58%|█████▊    | 8708/15000 [17:31<18:57,  5.53it/s]


 epoch: 8708 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 8709 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

input:       git uses sha hashes internally linus torvalds has responded that the hash was mostly to guard against accidental corruption

target:      git uses sha hashes internally linus torvalds has responded that the hash was mostly to guard against accidental corruption and


 58%|█████▊    | 8711/15000 [17:32<14:47,  7.09it/s]


prediction:  git uses sha hashes internally linus torvalds has responded that the hash was mostly to guard against accidental corruption the

 epoch: 8710 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8711 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.3%


 58%|█████▊    | 8714/15000 [17:32<12:01,  8.71it/s]


 epoch: 8712 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8713 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8714 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 58%|█████▊    | 8717/15000 [17:32<11:15,  9.30it/s]


 epoch: 8715 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8716 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 8717 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 58%|█████▊    | 8719/15000 [17:32<10:44,  9.75it/s]


 epoch: 8718 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8719 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

input:       was considered hero by pagan sources of his time and villain by christian ones gibbon wrote quite favourably about

target:      was considered hero by pagan sources of his time and villain by christian ones gibbon wrote quite favourably about julian

prediction:  was considered hero by pagan sources of his time and villain by christian ones gibbon wrote quite favourably about the


 58%|█████▊    | 8722/15000 [17:33<17:18,  6.05it/s]


 epoch: 8720 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

 epoch: 8721 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8722 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 58%|█████▊    | 8725/15000 [17:34<13:13,  7.91it/s]


 epoch: 8723 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8724 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 8725 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 58%|█████▊    | 8728/15000 [17:34<11:07,  9.39it/s]


 epoch: 8726 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8727 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8728 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 58%|█████▊    | 8730/15000 [17:34<11:13,  9.31it/s]


 epoch: 8729 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

input:       release was the arms deal update released on august the update added cosmetic weapon finishes or skins to the

target:      release was the arms deal update released on august the update added cosmetic weapon finishes or skins to the game

prediction:  release was the arms deal update released on august the update added cosmetic weapon finishes or skins to the the

 epoch: 8730 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8731 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 58%|█████▊    | 8734/15000 [17:34<10:11, 10.25it/s]


 epoch: 8732 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8733 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8734 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 58%|█████▊    | 8737/15000 [17:35<14:37,  7.14it/s]


 epoch: 8735 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8736 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8737 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 58%|█████▊    | 8738/15000 [17:35<13:57,  7.48it/s]


 epoch: 8738 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8739 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       the african union president and head of state who is also the president of the pan african parliament person

target:      the african union president and head of state who is also the president of the pan african parliament person becomes

prediction:  the african union president and head of state who is also the president of the pan african parliament person the


 58%|█████▊    | 8742/15000 [17:35<11:35,  8.99it/s]


 epoch: 8740 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8741 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8742 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 58%|█████▊    | 8745/15000 [17:36<10:32,  9.89it/s]


 epoch: 8743 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8744 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8745 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 58%|█████▊    | 8747/15000 [17:36<10:23, 10.03it/s]


 epoch: 8746 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8747 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8748 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 58%|█████▊    | 8750/15000 [17:37<17:02,  6.12it/s]


 epoch: 8749 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       pylos and sphakteria and sparta sued for peace but the athenians rejected the proposal the athenian failure to regain

target:      pylos and sphakteria and sparta sued for peace but the athenians rejected the proposal the athenian failure to regain control

prediction:  pylos and sphakteria and sparta sued for peace but the athenians rejected the proposal the athenian failure to regain the

 epoch: 8750 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 58%|█████▊    | 8753/15000 [17:37<13:07,  7.94it/s]


 epoch: 8751 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8752 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8753 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 58%|█████▊    | 8755/15000 [17:37<11:44,  8.87it/s]


 epoch: 8754 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8755 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8756 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 58%|█████▊    | 8759/15000 [17:37<10:15, 10.14it/s]


 epoch: 8757 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8758 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8759 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 58%|█████▊    | 8761/15000 [17:38<10:52,  9.56it/s]


input:       between person genotype and sunlight thus suntans are not passed on to people children the phenotype is the ability

target:      between person genotype and sunlight thus suntans are not passed on to people children the phenotype is the ability of

prediction:  between person genotype and sunlight thus suntans are not passed on to people children the phenotype is the ability the

 epoch: 8760 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8761 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 58%|█████▊    | 8763/15000 [17:38<10:36,  9.80it/s]


 epoch: 8762 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 8763 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 58%|█████▊    | 8765/15000 [17:39<17:02,  6.10it/s]


 epoch: 8764 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%

 epoch: 8765 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8766 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 58%|█████▊    | 8769/15000 [17:39<12:57,  8.02it/s]


 epoch: 8767 | train_loss: 0.20, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8768 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8769 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 58%|█████▊    | 8770/15000 [17:39<13:21,  7.78it/s]


input:       add on release candidate of powershell version was released on september with final release to the web on november

target:      add on release candidate of powershell version was released on september with final release to the web on november powershell

prediction:  add on release candidate of powershell version was released on september with final release to the web on november the

 epoch: 8770 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8771 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 58%|█████▊    | 8774/15000 [17:39<11:12,  9.26it/s]


 epoch: 8772 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8773 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8774 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 59%|█████▊    | 8776/15000 [17:40<10:37,  9.77it/s]


 epoch: 8775 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.5%

 epoch: 8776 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8777 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 59%|█████▊    | 8780/15000 [17:40<12:49,  8.08it/s]


 epoch: 8778 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8779 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

input:       later as these schools gained popularity more were started and some gained prestige these schools marked the beginning of

target:      later as these schools gained popularity more were started and some gained prestige these schools marked the beginning of modern

prediction:  later as these schools gained popularity more were started and some gained prestige these schools marked the beginning of the

 epoch: 8780 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 59%|█████▊    | 8783/15000 [17:40<11:23,  9.10it/s]


 epoch: 8781 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8782 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8783 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 59%|█████▊    | 8786/15000 [17:41<10:42,  9.67it/s]


 epoch: 8784 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8785 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8786 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 59%|█████▊    | 8788/15000 [17:41<10:35,  9.77it/s]


 epoch: 8787 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 8788 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8789 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 59%|█████▊    | 8790/15000 [17:41<10:59,  9.42it/s]


input:       practice in behavioral and cognitive behavioral therapy is exposing patients to things they fear based on the premise that

target:      practice in behavioral and cognitive behavioral therapy is exposing patients to things they fear based on the premise that their

prediction:  practice in behavioral and cognitive behavioral therapy is exposing patients to things they fear based on the premise that the

 epoch: 8790 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8791 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 59%|█████▊    | 8793/15000 [17:42<16:57,  6.10it/s]


 epoch: 8792 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8793 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 59%|█████▊    | 8795/15000 [17:42<15:11,  6.81it/s]


 epoch: 8794 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8795 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 59%|█████▊    | 8797/15000 [17:42<13:50,  7.47it/s]


 epoch: 8796 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.1%

 epoch: 8797 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 59%|█████▊    | 8799/15000 [17:43<13:07,  7.87it/s]


 epoch: 8798 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8799 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 59%|█████▊    | 8800/15000 [17:43<14:55,  6.92it/s]


input:       were created by independent artists who post work on other subreddits and who receive portion of the profits they

target:      were created by independent artists who post work on other subreddits and who receive portion of the profits they use

prediction:  were created by independent artists who post work on other subreddits and who receive portion of the profits they the

 epoch: 8800 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 59%|█████▊    | 8802/15000 [17:43<14:02,  7.36it/s]


 epoch: 8801 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8802 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 59%|█████▊    | 8804/15000 [17:43<13:15,  7.79it/s]


 epoch: 8803 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8804 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 59%|█████▊    | 8806/15000 [17:44<13:29,  7.65it/s]


 epoch: 8805 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8806 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.19, test_acc: 97.4%


 59%|█████▊    | 8808/15000 [17:44<17:18,  5.96it/s]


 epoch: 8807 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8808 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 59%|█████▊    | 8810/15000 [17:44<16:21,  6.31it/s]


 epoch: 8809 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       the reign of heraclius would the roman army push so far to the east and roman territory never again

target:      the reign of heraclius would the roman army push so far to the east and roman territory never again reached

prediction:  the reign of heraclius would the roman army push so far to the east and roman territory never again the

 epoch: 8810 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 59%|█████▊    | 8812/15000 [17:45<13:55,  7.41it/s]


 epoch: 8811 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8812 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 59%|█████▉    | 8814/15000 [17:45<12:27,  8.28it/s]


 epoch: 8813 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8814 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8815 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 59%|█████▉    | 8817/15000 [17:45<10:55,  9.43it/s]


 epoch: 8816 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8817 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8818 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%


 59%|█████▉    | 8820/15000 [17:45<11:35,  8.88it/s]


 epoch: 8819 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

input:       in during the reign of caracalla roman citizenship was granted to all freeborn inhabitants of the empire the severan

target:      in during the reign of caracalla roman citizenship was granted to all freeborn inhabitants of the empire the severan dynasty

prediction:  in during the reign of caracalla roman citizenship was granted to all freeborn inhabitants of the empire the severan the

 epoch: 8820 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 59%|█████▉    | 8823/15000 [17:46<11:02,  9.32it/s]


 epoch: 8821 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8822 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8823 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 59%|█████▉    | 8825/15000 [17:46<10:24,  9.90it/s]


 epoch: 8824 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8825 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.8%

 epoch: 8826 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 59%|█████▉    | 8829/15000 [17:46<09:23, 10.96it/s]


 epoch: 8827 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8828 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8829 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 59%|█████▉    | 8831/15000 [17:46<10:06, 10.17it/s]


input:       first star catalog in which he proposed the modern system of apparent magnitudes the antikythera mechanism device for calculating

target:      first star catalog in which he proposed the modern system of apparent magnitudes the antikythera mechanism device for calculating the

prediction:  first star catalog in which he proposed the modern system of apparent magnitudes the antikythera mechanism device for calculating the

 epoch: 8830 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 8831 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 59%|█████▉    | 8833/15000 [17:47<10:05, 10.19it/s]


 epoch: 8832 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8833 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8834 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 59%|█████▉    | 8837/15000 [17:47<14:15,  7.20it/s]


 epoch: 8835 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8836 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8837 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 59%|█████▉    | 8840/15000 [17:48<12:20,  8.32it/s]


 epoch: 8838 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8839 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       independence from the british monarchy then governed by king george iii and detailed the factors that contributed to their

target:      independence from the british monarchy then governed by king george iii and detailed the factors that contributed to their decision

prediction:  independence from the british monarchy then governed by king george iii and detailed the factors that contributed to their the

 epoch: 8840 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 59%|█████▉    | 8842/15000 [17:48<11:15,  9.12it/s]


 epoch: 8841 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8842 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8843 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 59%|█████▉    | 8846/15000 [17:48<10:04, 10.17it/s]


 epoch: 8844 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8845 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8846 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 59%|█████▉    | 8848/15000 [17:48<10:05, 10.17it/s]


 epoch: 8847 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8848 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 59%|█████▉    | 8850/15000 [17:49<15:31,  6.60it/s]


 epoch: 8849 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.4%

input:       the first form of the modern technical drawing including perfected exploded view technique to represent internal components those studies

target:      the first form of the modern technical drawing including perfected exploded view technique to represent internal components those studies and

prediction:  the first form of the modern technical drawing including perfected exploded view technique to represent internal components those studies the

 epoch: 8850 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 59%|█████▉    | 8853/15000 [17:49<12:42,  8.06it/s]


 epoch: 8851 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8852 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8853 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.3%


 59%|█████▉    | 8856/15000 [17:50<11:01,  9.28it/s]


 epoch: 8854 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8855 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8856 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 59%|█████▉    | 8858/15000 [17:50<10:42,  9.57it/s]


 epoch: 8857 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.2%

 epoch: 8858 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8859 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 59%|█████▉    | 8860/15000 [17:50<10:43,  9.54it/s]


input:       of psychoanalysis albert bandura helped along the transition in psychology from behaviorism to cognitive psychology bandura and other social

target:      of psychoanalysis albert bandura helped along the transition in psychology from behaviorism to cognitive psychology bandura and other social learning

prediction:  of psychoanalysis albert bandura helped along the transition in psychology from behaviorism to cognitive psychology bandura and other social the

 epoch: 8860 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8861 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 59%|█████▉    | 8862/15000 [17:50<10:19,  9.91it/s]


 epoch: 8862 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8863 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 59%|█████▉    | 8866/15000 [17:51<13:26,  7.61it/s]


 epoch: 8864 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8865 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.8%

 epoch: 8866 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 59%|█████▉    | 8869/15000 [17:51<11:30,  8.87it/s]


 epoch: 8867 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8868 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 8869 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 59%|█████▉    | 8871/15000 [17:51<11:25,  8.94it/s]


input:       are not restricted to those nations the largest cities in south america by far are paulo rio de

target:      are not restricted to those nations the largest cities in south america by far are paulo rio de janeiro

prediction:  are not restricted to those nations the largest cities in south america by far are paulo rio de the

 epoch: 8870 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8871 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 59%|█████▉    | 8873/15000 [17:52<10:42,  9.53it/s]


 epoch: 8872 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8873 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8874 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.3%


 59%|█████▉    | 8877/15000 [17:52<10:06, 10.09it/s]


 epoch: 8875 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8876 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8877 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 59%|█████▉    | 8879/15000 [17:53<17:25,  5.86it/s]


 epoch: 8878 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8879 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

input:       lack of state support contributed to low literacy primary education in reading writing and arithmetic might take place at

target:      lack of state support contributed to low literacy primary education in reading writing and arithmetic might take place at home

prediction:  lack of state support contributed to low literacy primary education in reading writing and arithmetic might take place at the


 59%|█████▉    | 8881/15000 [17:53<15:30,  6.57it/s]


 epoch: 8880 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8881 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 8882 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 59%|█████▉    | 8885/15000 [17:53<11:29,  8.87it/s]


 epoch: 8883 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8884 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8885 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 59%|█████▉    | 8887/15000 [17:53<10:54,  9.34it/s]


 epoch: 8886 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8887 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%

 epoch: 8888 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 59%|█████▉    | 8889/15000 [17:54<10:25,  9.78it/s]


 epoch: 8889 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       athens in since the defeat was largely blamed on democratic politicians such as cleon and cleophon there was brief

target:      athens in since the defeat was largely blamed on democratic politicians such as cleon and cleophon there was brief reaction

prediction:  athens in since the defeat was largely blamed on democratic politicians such as cleon and cleophon there was brief the

 epoch: 8890 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 59%|█████▉    | 8893/15000 [17:54<10:16,  9.91it/s]


 epoch: 8891 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8892 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8893 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 59%|█████▉    | 8895/15000 [17:54<09:53, 10.28it/s]


 epoch: 8894 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8895 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8896 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 59%|█████▉    | 8899/15000 [17:54<09:13, 11.03it/s]


 epoch: 8897 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8898 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8899 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.9%


 59%|█████▉    | 8901/15000 [17:55<09:46, 10.39it/s]


input:       production production and division of labor enabled the mass production of goods the contemporary concept of the economy wasn

target:      production production and division of labor enabled the mass production of goods the contemporary concept of the economy wasn popularly

prediction:  production production and division of labor enabled the mass production of goods the contemporary concept of the economy wasn the

 epoch: 8900 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8901 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.7%


 59%|█████▉    | 8903/15000 [17:55<10:24,  9.77it/s]


 epoch: 8902 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8903 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 59%|█████▉    | 8905/15000 [17:55<10:44,  9.45it/s]


 epoch: 8904 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%

 epoch: 8905 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 59%|█████▉    | 8907/15000 [17:55<12:06,  8.38it/s]


 epoch: 8906 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8907 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 59%|█████▉    | 8909/15000 [17:56<12:04,  8.41it/s]


 epoch: 8908 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.5%

 epoch: 8909 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 59%|█████▉    | 8910/15000 [17:56<13:34,  7.47it/s]


input:       development this can include factors such as learning new skills developing talents fostering creativity and increasing self knowledge as

target:      development this can include factors such as learning new skills developing talents fostering creativity and increasing self knowledge as well

prediction:  development this can include factors such as learning new skills developing talents fostering creativity and increasing self knowledge as the

 epoch: 8910 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8911 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 59%|█████▉    | 8913/15000 [17:56<12:38,  8.02it/s]


 epoch: 8912 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8913 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 59%|█████▉    | 8915/15000 [17:56<12:24,  8.17it/s]


 epoch: 8914 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8915 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 59%|█████▉    | 8917/15000 [17:57<12:14,  8.28it/s]


 epoch: 8916 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8917 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 59%|█████▉    | 8919/15000 [17:57<12:27,  8.13it/s]


 epoch: 8918 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8919 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 59%|█████▉    | 8920/15000 [17:57<14:40,  6.91it/s]


input:       the great he had daughter galla placidia his son gratian did not survive infancy galla placidia having grown up

target:      the great he had daughter galla placidia his son gratian did not survive infancy galla placidia having grown up at

prediction:  the great he had daughter galla placidia his son gratian did not survive infancy galla placidia having grown up the

 epoch: 8920 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 59%|█████▉    | 8922/15000 [17:58<23:58,  4.22it/s]


 epoch: 8921 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8922 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 59%|█████▉    | 8924/15000 [17:58<17:45,  5.70it/s]


 epoch: 8923 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8924 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 60%|█████▉    | 8926/15000 [17:58<14:54,  6.79it/s]


 epoch: 8925 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8926 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 60%|█████▉    | 8928/15000 [17:59<13:25,  7.53it/s]


 epoch: 8927 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8928 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 60%|█████▉    | 8930/15000 [17:59<12:53,  7.85it/s]


 epoch: 8929 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       confederation congress northwest ordinance established the precedent by which the national government would be sovereign and expand westward with

target:      confederation congress northwest ordinance established the precedent by which the national government would be sovereign and expand westward with the

prediction:  confederation congress northwest ordinance established the precedent by which the national government would be sovereign and expand westward with the

 epoch: 8930 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 60%|█████▉    | 8932/15000 [17:59<11:45,  8.60it/s]


 epoch: 8931 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8932 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8933 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%

 60%|█████▉    | 8934/15000 [17:59<10:56,  9.25it/s]



 epoch: 8934 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 8935 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 60%|█████▉    | 8938/15000 [18:00<12:08,  8.32it/s]


 epoch: 8936 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

 epoch: 8937 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 8938 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 60%|█████▉    | 8940/15000 [18:00<11:34,  8.72it/s]


 epoch: 8939 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

input:       of north america other islands often associated with geopolitical south america are the chilo archipelago and robinson crusoe island

target:      of north america other islands often associated with geopolitical south america are the chilo archipelago and robinson crusoe island both

prediction:  of north america other islands often associated with geopolitical south america are the chilo archipelago and robinson crusoe island the

 epoch: 8940 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8941 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%


 60%|█████▉    | 8944/15000 [18:00<09:49, 10.27it/s]


 epoch: 8942 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

 epoch: 8943 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8944 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 60%|█████▉    | 8946/15000 [18:00<09:50, 10.25it/s]


 epoch: 8945 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 8946 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8947 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 60%|█████▉    | 8948/15000 [18:01<09:30, 10.61it/s]


 epoch: 8948 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8949 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

input:       basic conjugation is he hears if the subject is noun suffixes are not added to the verb m

target:      basic conjugation is he hears if the subject is noun suffixes are not added to the verb mt

prediction:  basic conjugation is he hears if the subject is noun suffixes are not added to the verb m the


 60%|█████▉    | 8951/15000 [18:01<12:59,  7.76it/s]


 epoch: 8950 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 8951 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8952 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 60%|█████▉    | 8955/15000 [18:02<10:26,  9.65it/s]


 epoch: 8953 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8954 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 8955 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%


 60%|█████▉    | 8957/15000 [18:02<09:59, 10.08it/s]


 epoch: 8956 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 8957 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8958 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 60%|█████▉    | 8959/15000 [18:02<09:32, 10.56it/s]


 epoch: 8959 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       reach the geographic south pole using route from the bay of whales and up the axel heiberg glacier one

target:      reach the geographic south pole using route from the bay of whales and up the axel heiberg glacier one month

prediction:  reach the geographic south pole using route from the bay of whales and up the axel heiberg glacier one the

 epoch: 8960 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.9%


 60%|█████▉    | 8963/15000 [18:02<09:52, 10.18it/s]


 epoch: 8961 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8962 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8963 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 60%|█████▉    | 8965/15000 [18:03<16:23,  6.14it/s]


 epoch: 8964 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8965 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8966 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.0%


 60%|█████▉    | 8969/15000 [18:03<12:26,  8.08it/s]


 epoch: 8967 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8968 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.5%

 epoch: 8969 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 60%|█████▉    | 8971/15000 [18:04<12:04,  8.32it/s]


input:       credits and the end poem roughly word work written by irish novelist julian gough which takes about nine minutes

target:      credits and the end poem roughly word work written by irish novelist julian gough which takes about nine minutes to

prediction:  credits and the end poem roughly word work written by irish novelist julian gough which takes about nine minutes the

 epoch: 8970 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8971 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 60%|█████▉    | 8974/15000 [18:04<10:56,  9.18it/s]


 epoch: 8972 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8973 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8974 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 60%|█████▉    | 8976/15000 [18:04<10:13,  9.81it/s]


 epoch: 8975 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 8976 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8977 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 60%|█████▉    | 8978/15000 [18:04<11:06,  9.04it/s]


 epoch: 8978 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 8979 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

input:       by seth meyers corpsing or breaking when the comedian laughs unintentionally during portion of the show in which they

target:      by seth meyers corpsing or breaking when the comedian laughs unintentionally during portion of the show in which they are

prediction:  by seth meyers corpsing or breaking when the comedian laughs unintentionally during portion of the show in which they the


 60%|█████▉    | 8981/15000 [18:05<10:44,  9.33it/s]


 epoch: 8980 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8981 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 8982 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 60%|█████▉    | 8985/15000 [18:05<09:33, 10.48it/s]


 epoch: 8983 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 8984 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 8985 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 60%|█████▉    | 8987/15000 [18:05<09:40, 10.37it/s]


 epoch: 8986 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 8987 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 8988 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 60%|█████▉    | 8989/15000 [18:05<09:38, 10.39it/s]


 epoch: 8989 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       in hittite records recounting how confederation of assuwan states including troy unsuccessfully rebelled against the hittite king tudhaliya around

target:      in hittite records recounting how confederation of assuwan states including troy unsuccessfully rebelled against the hittite king tudhaliya around bce

prediction:  in hittite records recounting how confederation of assuwan states including troy unsuccessfully rebelled against the hittite king tudhaliya around the

 epoch: 8990 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.20, test_acc: 97.3%


 60%|█████▉    | 8991/15000 [18:06<10:28,  9.56it/s]


 epoch: 8991 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 8992 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 60%|█████▉    | 8994/15000 [18:06<14:02,  7.13it/s]


 epoch: 8993 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8994 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 8995 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 60%|█████▉    | 8998/15000 [18:06<11:04,  9.03it/s]


 epoch: 8996 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 8997 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 8998 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 60%|██████    | 9000/15000 [18:07<10:43,  9.33it/s]


 epoch: 8999 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

input:       major figures in contemporary linguistics of these times include ferdinand de saussure and noam chomsky language is thought to

target:      major figures in contemporary linguistics of these times include ferdinand de saussure and noam chomsky language is thought to have

prediction:  major figures in contemporary linguistics of these times include ferdinand de saussure and noam chomsky language is thought to the

 epoch: 9000 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9001 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 60%|██████    | 9004/15000 [18:07<09:37, 10.39it/s]


 epoch: 9002 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9003 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9004 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 60%|██████    | 9006/15000 [18:07<09:47, 10.20it/s]


 epoch: 9005 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9006 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 60%|██████    | 9008/15000 [18:08<16:16,  6.14it/s]


 epoch: 9007 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9008 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9009 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.0%


 60%|██████    | 9010/15000 [18:08<14:38,  6.82it/s]


input:       stipulating that the slave could not be employed for prostitution as prostitutes in ancient rome were often slaves the

target:      stipulating that the slave could not be employed for prostitution as prostitutes in ancient rome were often slaves the burgeoning

prediction:  stipulating that the slave could not be employed for prostitution as prostitutes in ancient rome were often slaves the the

 epoch: 9010 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9011 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 60%|██████    | 9014/15000 [18:08<11:32,  8.65it/s]


 epoch: 9012 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9013 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9014 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 60%|██████    | 9016/15000 [18:09<11:10,  8.93it/s]


 epoch: 9015 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9016 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 60%|██████    | 9018/15000 [18:09<11:15,  8.86it/s]


 epoch: 9017 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 9018 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%


 60%|██████    | 9020/15000 [18:09<12:03,  8.27it/s]


 epoch: 9019 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

input:       and philosophy at rome and gave secondary teachers special exemptions from taxes and legal penalties in the eastern empire

target:      and philosophy at rome and gave secondary teachers special exemptions from taxes and legal penalties in the eastern empire berytus

prediction:  and philosophy at rome and gave secondary teachers special exemptions from taxes and legal penalties in the eastern empire the

 epoch: 9020 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 60%|██████    | 9022/15000 [18:09<12:35,  7.91it/s]


 epoch: 9021 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9022 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 60%|██████    | 9024/15000 [18:10<12:22,  8.05it/s]


 epoch: 9023 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9024 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 60%|██████    | 9026/15000 [18:10<11:54,  8.36it/s]


 epoch: 9025 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9026 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 60%|██████    | 9028/15000 [18:10<11:53,  8.37it/s]


 epoch: 9027 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.19, test_acc: 97.4%

 epoch: 9028 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 60%|██████    | 9029/15000 [18:10<12:05,  8.23it/s]


 epoch: 9029 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

input:       with new functions exon shuffling when new genes are assembled from shuffling pre existing parts domains act as modules

target:      with new functions exon shuffling when new genes are assembled from shuffling pre existing parts domains act as modules with

prediction:  with new functions exon shuffling when new genes are assembled from shuffling pre existing parts domains act as modules the


 60%|██████    | 9031/15000 [18:11<13:58,  7.11it/s]


 epoch: 9030 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9031 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 60%|██████    | 9033/15000 [18:11<13:00,  7.65it/s]


 epoch: 9032 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 9033 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 60%|██████    | 9035/15000 [18:11<12:11,  8.15it/s]


 epoch: 9034 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9035 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 60%|██████    | 9037/15000 [18:12<22:14,  4.47it/s]


 epoch: 9036 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9037 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 60%|██████    | 9039/15000 [18:12<17:42,  5.61it/s]


 epoch: 9038 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9039 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%


 60%|██████    | 9040/15000 [18:12<18:10,  5.47it/s]


input:       as they provide the primary record of the majority of the geological history of the earth there are three

target:      as they provide the primary record of the majority of the geological history of the earth there are three major

prediction:  as they provide the primary record of the majority of the geological history of the earth there are three the

 epoch: 9040 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 60%|██████    | 9042/15000 [18:12<14:29,  6.85it/s]


 epoch: 9041 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9042 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 60%|██████    | 9045/15000 [18:13<11:38,  8.53it/s]


 epoch: 9043 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9044 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9045 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 60%|██████    | 9047/15000 [18:13<11:20,  8.74it/s]


 epoch: 9046 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9047 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 60%|██████    | 9049/15000 [18:13<11:03,  8.97it/s]


 epoch: 9048 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9049 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       commandery the final expansion in this period began during the reign of ying zheng the king of qin his

target:      commandery the final expansion in this period began during the reign of ying zheng the king of qin his unification

prediction:  commandery the final expansion in this period began during the reign of ying zheng the king of qin his the


 60%|██████    | 9052/15000 [18:14<17:43,  5.59it/s]


 epoch: 9050 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9051 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9052 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 60%|██████    | 9054/15000 [18:14<14:17,  6.93it/s]


 epoch: 9053 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9054 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9055 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%


 60%|██████    | 9058/15000 [18:14<10:52,  9.11it/s]


 epoch: 9056 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9057 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9058 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 60%|██████    | 9060/15000 [18:15<10:51,  9.11it/s]


 epoch: 9059 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

input:       reported less anxiety in both groups while subjects which rated lower on sense of humour reported less anxiety in

target:      reported less anxiety in both groups while subjects which rated lower on sense of humour reported less anxiety in the

prediction:  reported less anxiety in both groups while subjects which rated lower on sense of humour reported less anxiety in the

 epoch: 9060 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 60%|██████    | 9062/15000 [18:15<10:26,  9.47it/s]


 epoch: 9061 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9062 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9063 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 60%|██████    | 9066/15000 [18:16<14:06,  7.01it/s]


 epoch: 9064 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9065 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9066 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 60%|██████    | 9069/15000 [18:16<11:34,  8.54it/s]


 epoch: 9067 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9068 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9069 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 60%|██████    | 9071/15000 [18:16<11:20,  8.71it/s]


input:       permanent limit of the empire expansion in that direction in he resumed the roman conquest of britannia that julius

target:      permanent limit of the empire expansion in that direction in he resumed the roman conquest of britannia that julius caesar

prediction:  permanent limit of the empire expansion in that direction in he resumed the roman conquest of britannia that julius the

 epoch: 9070 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9071 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 60%|██████    | 9073/15000 [18:16<10:31,  9.38it/s]


 epoch: 9072 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9073 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9074 | train_loss: 0.22, train_acc: 97.5% | test_loss: 0.23, test_acc: 97.1%


 61%|██████    | 9077/15000 [18:17<09:45, 10.11it/s]


 epoch: 9075 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9076 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9077 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%


 61%|██████    | 9080/15000 [18:17<15:01,  6.56it/s]


 epoch: 9078 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9079 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

input:       kinds of mutation transition transversion bias gc at bias deletion insertion bias this is related to the idea

target:      kinds of mutation transition transversion bias gc at bias deletion insertion bias this is related to the idea of

prediction:  kinds of mutation transition transversion bias gc at bias deletion insertion bias this is related to the idea the

 epoch: 9080 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.1%


 61%|██████    | 9083/15000 [18:18<11:56,  8.26it/s]


 epoch: 9081 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9082 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9083 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 61%|██████    | 9085/15000 [18:18<10:48,  9.12it/s]


 epoch: 9084 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%

 epoch: 9085 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9086 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 61%|██████    | 9089/15000 [18:18<09:33, 10.32it/s]


 epoch: 9087 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9088 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 9089 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 61%|██████    | 9091/15000 [18:18<10:09,  9.69it/s]


input:       precious metal rather than true money but in the following centuries international traders came to rely on coinage egyptian

target:      precious metal rather than true money but in the following centuries international traders came to rely on coinage egyptian society

prediction:  precious metal rather than true money but in the following centuries international traders came to rely on coinage egyptian the

 epoch: 9090 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%

 epoch: 9091 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9092 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%


 61%|██████    | 9095/15000 [18:19<14:15,  6.90it/s]


 epoch: 9093 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9094 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9095 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 61%|██████    | 9097/15000 [18:19<12:39,  7.78it/s]


 epoch: 9096 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9097 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9098 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%


 61%|██████    | 9100/15000 [18:20<11:40,  8.42it/s]


 epoch: 9099 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       valve also participated in the event updating team fortress with reddit related cosmetics when the event ended team orangered

target:      valve also participated in the event updating team fortress with reddit related cosmetics when the event ended team orangered was

prediction:  valve also participated in the event updating team fortress with reddit related cosmetics when the event ended team orangered the

 epoch: 9100 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 61%|██████    | 9103/15000 [18:20<10:27,  9.40it/s]


 epoch: 9101 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9102 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9103 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 61%|██████    | 9105/15000 [18:20<10:03,  9.76it/s]


 epoch: 9104 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9105 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.4%

 epoch: 9106 | train_loss: 0.19, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 61%|██████    | 9109/15000 [18:21<14:01,  7.00it/s]


 epoch: 9107 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9108 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9109 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.3%


 61%|██████    | 9110/15000 [18:21<13:56,  7.05it/s]


input:       los angeles the nation second most populous city is the leader in motion picture production and the most recognizable

target:      los angeles the nation second most populous city is the leader in motion picture production and the most recognizable movie

prediction:  los angeles the nation second most populous city is the leader in motion picture production and the most recognizable the

 epoch: 9110 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9111 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 61%|██████    | 9114/15000 [18:22<10:53,  9.01it/s]


 epoch: 9112 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.26, test_acc: 97.0%

 epoch: 9113 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9114 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 61%|██████    | 9116/15000 [18:22<10:19,  9.49it/s]


 epoch: 9115 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9116 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9117 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 61%|██████    | 9118/15000 [18:22<10:00,  9.79it/s]


 epoch: 9118 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9119 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%

input:       on his death granted it to ferdinand and isabella and their successors who never used it when the ottomans

target:      on his death granted it to ferdinand and isabella and their successors who never used it when the ottomans who

prediction:  on his death granted it to ferdinand and isabella and their successors who never used it when the ottomans the


 61%|██████    | 9120/15000 [18:22<10:04,  9.73it/s]


 epoch: 9120 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%


 61%|██████    | 9122/15000 [18:23<17:02,  5.75it/s]


 epoch: 9121 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9122 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 61%|██████    | 9124/15000 [18:23<14:49,  6.61it/s]


 epoch: 9123 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9124 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 61%|██████    | 9126/15000 [18:23<13:26,  7.29it/s]


 epoch: 9125 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9126 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 61%|██████    | 9128/15000 [18:23<12:57,  7.55it/s]


 epoch: 9127 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9128 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%


 61%|██████    | 9129/15000 [18:24<13:05,  7.48it/s]


 epoch: 9129 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

input:       kumi tanioka the wild update soundtrack with samuel berg and trails and tales soundtrack with the addition of aaron

target:      kumi tanioka the wild update soundtrack with samuel berg and trails and tales soundtrack with the addition of aaron cherof

prediction:  kumi tanioka the wild update soundtrack with samuel berg and trails and tales soundtrack with the addition of aaron the


 61%|██████    | 9131/15000 [18:24<15:13,  6.42it/s]


 epoch: 9130 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9131 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.2%


 61%|██████    | 9133/15000 [18:24<14:23,  6.80it/s]


 epoch: 9132 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9133 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 96.9%


 61%|██████    | 9135/15000 [18:25<14:40,  6.66it/s]


 epoch: 9134 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9135 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%


 61%|██████    | 9137/15000 [18:25<13:20,  7.33it/s]


 epoch: 9136 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9137 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 61%|██████    | 9139/15000 [18:25<12:28,  7.83it/s]


 epoch: 9138 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9139 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 61%|██████    | 9140/15000 [18:25<13:55,  7.01it/s]


input:       often difficult to see where the initial problem happened in those cases memory debugger tools may be needed in

target:      often difficult to see where the initial problem happened in those cases memory debugger tools may be needed in certain

prediction:  often difficult to see where the initial problem happened in those cases memory debugger tools may be needed in the

 epoch: 9140 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 61%|██████    | 9142/15000 [18:25<12:41,  7.69it/s]


 epoch: 9141 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9142 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 61%|██████    | 9144/15000 [18:26<12:06,  8.06it/s]


 epoch: 9143 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9144 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 61%|██████    | 9146/15000 [18:26<11:49,  8.25it/s]


 epoch: 9145 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9146 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 61%|██████    | 9148/15000 [18:26<11:20,  8.60it/s]


 epoch: 9147 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9148 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 61%|██████    | 9149/15000 [18:26<11:25,  8.53it/s]


 epoch: 9149 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

input:       by the end of the fifth century ad india would remain fragmented into smaller states until the rise of

target:      by the end of the fifth century ad india would remain fragmented into smaller states until the rise of the

prediction:  by the end of the fifth century ad india would remain fragmented into smaller states until the rise of the


 61%|██████    | 9151/15000 [18:27<22:11,  4.39it/s]


 epoch: 9150 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.3%

 epoch: 9151 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 61%|██████    | 9153/15000 [18:27<16:52,  5.78it/s]


 epoch: 9152 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9153 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 61%|██████    | 9155/15000 [18:28<14:37,  6.66it/s]


 epoch: 9154 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.3%

 epoch: 9155 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 61%|██████    | 9157/15000 [18:28<12:24,  7.85it/s]


 epoch: 9156 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9157 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 9158 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.4%


 61%|██████    | 9160/15000 [18:28<11:56,  8.15it/s]


 epoch: 9159 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       became increasingly specialized resulting in true multicellular organisms with the ozone layer absorbing harmful ultraviolet radiation life colonized the

target:      became increasingly specialized resulting in true multicellular organisms with the ozone layer absorbing harmful ultraviolet radiation life colonized the surface

prediction:  became increasingly specialized resulting in true multicellular organisms with the ozone layer absorbing harmful ultraviolet radiation life colonized the the

 epoch: 9160 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 61%|██████    | 9163/15000 [18:28<10:35,  9.19it/s]


 epoch: 9161 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9162 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9163 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 61%|██████    | 9166/15000 [18:29<14:29,  6.71it/s]


 epoch: 9164 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9165 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9166 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 61%|██████    | 9169/15000 [18:29<11:25,  8.51it/s]


 epoch: 9167 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9168 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9169 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 61%|██████    | 9170/15000 [18:29<12:05,  8.03it/s]


input:       binding the argument value to the parameter is done by powershell itself but for external executables arguments are parsed

target:      binding the argument value to the parameter is done by powershell itself but for external executables arguments are parsed by

prediction:  binding the argument value to the parameter is done by powershell itself but for external executables arguments are parsed the

 epoch: 9170 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9171 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 61%|██████    | 9174/15000 [18:30<09:57,  9.76it/s]


 epoch: 9172 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9173 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9174 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 61%|██████    | 9176/15000 [18:30<09:45,  9.95it/s]


 epoch: 9175 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9176 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9177 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%


 61%|██████    | 9178/15000 [18:31<16:21,  5.93it/s]


 epoch: 9178 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9179 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

input:       global offensive with eleague esports csgo and eleague cs go premier docu series on the tbs network counter strike

target:      global offensive with eleague esports csgo and eleague cs go premier docu series on the tbs network counter strike global

prediction:  global offensive with eleague esports csgo and eleague cs go premier docu series on the tbs network counter strike the


 61%|██████    | 9181/15000 [18:31<13:31,  7.17it/s]


 epoch: 9180 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9181 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9182 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 61%|██████    | 9185/15000 [18:31<10:37,  9.12it/s]


 epoch: 9183 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9184 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9185 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 61%|██████    | 9187/15000 [18:31<10:00,  9.67it/s]


 epoch: 9186 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

 epoch: 9187 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9188 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 61%|██████▏   | 9189/15000 [18:32<09:45,  9.93it/s]


 epoch: 9189 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

input:       could explain the diversity of plants and animals from common ancestry through the working of natural laws in the

target:      could explain the diversity of plants and animals from common ancestry through the working of natural laws in the same

prediction:  could explain the diversity of plants and animals from common ancestry through the working of natural laws in the the

 epoch: 9190 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 61%|██████▏   | 9191/15000 [18:32<09:58,  9.70it/s]


 epoch: 9191 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%


 61%|██████▏   | 9193/15000 [18:33<16:42,  5.79it/s]


 epoch: 9192 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9193 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 9194 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 61%|██████▏   | 9197/15000 [18:33<12:21,  7.83it/s]


 epoch: 9195 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9196 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9197 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 61%|██████▏   | 9199/15000 [18:33<11:31,  8.39it/s]


 epoch: 9198 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%

 epoch: 9199 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       worth the latter occupying more primary role in students hierarchy of needs this article incorporates text from free content

target:      worth the latter occupying more primary role in students hierarchy of needs this article incorporates text from free content work

prediction:  worth the latter occupying more primary role in students hierarchy of needs this article incorporates text from free content the


 61%|██████▏   | 9202/15000 [18:33<10:20,  9.34it/s]


 epoch: 9200 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9201 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9202 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 61%|██████▏   | 9204/15000 [18:34<09:39, 10.01it/s]


 epoch: 9203 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9204 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9205 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 61%|██████▏   | 9206/15000 [18:34<09:26, 10.23it/s]


 epoch: 9206 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 61%|██████▏   | 9208/15000 [18:34<15:14,  6.34it/s]


 epoch: 9207 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9208 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%

 epoch: 9209 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 61%|██████▏   | 9210/15000 [18:35<13:50,  6.97it/s]


input:       how from different critical perspectives these perspectives include futurism or techno utopianism technological determinism social constructionism postmodernism poststructuralism and

target:      how from different critical perspectives these perspectives include futurism or techno utopianism technological determinism social constructionism postmodernism poststructuralism and feminist

prediction:  how from different critical perspectives these perspectives include futurism or techno formations technological determinism social tedious postmodernism unanimously and the

 epoch: 9210 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%

 epoch: 9211 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 61%|██████▏   | 9214/15000 [18:35<11:03,  8.72it/s]


 epoch: 9212 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9213 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 9214 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 61%|██████▏   | 9216/15000 [18:35<10:21,  9.31it/s]


 epoch: 9215 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9216 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 61%|██████▏   | 9218/15000 [18:35<10:24,  9.26it/s]


 epoch: 9217 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9218 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9219 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 61%|██████▏   | 9220/15000 [18:36<10:31,  9.16it/s]


input:       of phenomena and usually frame their understanding in mathematical terms they work across wide range of research fields spanning

target:      of phenomena and usually frame their understanding in mathematical terms they work across wide range of research fields spanning all

prediction:  of phenomena and usually frame their understanding in mathematical terms they work across wide range of research fields spanning the

 epoch: 9220 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 61%|██████▏   | 9223/15000 [18:36<14:46,  6.52it/s]


 epoch: 9221 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9222 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9223 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 62%|██████▏   | 9225/15000 [18:36<12:47,  7.52it/s]


 epoch: 9224 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9225 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9226 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 62%|██████▏   | 9229/15000 [18:37<10:17,  9.35it/s]


 epoch: 9227 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.26, test_acc: 96.8%

 epoch: 9228 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9229 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 62%|██████▏   | 9231/15000 [18:37<10:28,  9.19it/s]


input:       to write down on piece of paper the meanings of the ten commandments despite the subjects own expectations of

target:      to write down on piece of paper the meanings of the ten commandments despite the subjects own expectations of consensus

prediction:  to write down on piece of paper the meanings of the ten commandments despite the subjects own expectations of the

 epoch: 9230 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9231 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 62%|██████▏   | 9233/15000 [18:37<10:10,  9.44it/s]


 epoch: 9232 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9233 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9234 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 62%|██████▏   | 9236/15000 [18:38<16:00,  6.00it/s]


 epoch: 9235 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9236 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 62%|██████▏   | 9238/15000 [18:38<14:13,  6.75it/s]


 epoch: 9237 | train_loss: 0.19, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.2%

 epoch: 9238 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 62%|██████▏   | 9240/15000 [18:38<14:17,  6.72it/s]


 epoch: 9239 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

input:       g voluntary nature of participation in the research privacy and well being minimizing distress of research participants university

target:      g voluntary nature of participation in the research privacy and well being minimizing distress of research participants university ethics

prediction:  is voluntary nature of participation in the research privacy and well being minimizing distress of research participants university the

 epoch: 9240 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 62%|██████▏   | 9242/15000 [18:39<12:48,  7.49it/s]


 epoch: 9241 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9242 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 62%|██████▏   | 9244/15000 [18:39<12:21,  7.76it/s]


 epoch: 9243 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9244 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 62%|██████▏   | 9246/15000 [18:39<12:49,  7.48it/s]


 epoch: 9245 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9246 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 62%|██████▏   | 9248/15000 [18:40<12:45,  7.51it/s]


 epoch: 9247 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9248 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 62%|██████▏   | 9250/15000 [18:40<14:07,  6.79it/s]


 epoch: 9249 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       copies gamespot announced in december that minecraft sold over million copies since the game debuted on xbox live arcade

target:      copies gamespot announced in december that minecraft sold over million copies since the game debuted on xbox live arcade in

prediction:  copies gamespot announced in december that minecraft sold over million copies since the game debuted on xbox live arcade the

 epoch: 9250 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 62%|██████▏   | 9252/15000 [18:40<13:06,  7.31it/s]


 epoch: 9251 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9252 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 62%|██████▏   | 9254/15000 [18:40<12:14,  7.82it/s]


 epoch: 9253 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9254 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 62%|██████▏   | 9256/15000 [18:41<11:56,  8.02it/s]


 epoch: 9255 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9256 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 62%|██████▏   | 9258/15000 [18:41<11:22,  8.41it/s]


 epoch: 9257 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.1%

 epoch: 9258 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 62%|██████▏   | 9260/15000 [18:41<12:30,  7.65it/s]


 epoch: 9259 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

input:       of an effort to be more data driven reddit built data analytics pipeline on top of apache kafka and

target:      of an effort to be more data driven reddit built data analytics pipeline on top of apache kafka and hive

prediction:  of an effort to be more data driven reddit built data analytics pipeline on top of apache kafka and the

 epoch: 9260 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 62%|██████▏   | 9262/15000 [18:41<11:35,  8.25it/s]


 epoch: 9261 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9262 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 62%|██████▏   | 9265/15000 [18:42<17:15,  5.54it/s]


 epoch: 9263 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9264 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.4%

 epoch: 9265 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 62%|██████▏   | 9268/15000 [18:42<12:23,  7.71it/s]


 epoch: 9266 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9267 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9268 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%


 62%|██████▏   | 9270/15000 [18:43<11:21,  8.41it/s]


 epoch: 9269 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

input:       was held for the first time in africa antarctica asia australia europe north america south america afro eurasia america

target:      was held for the first time in africa antarctica asia australia europe north america south america afro eurasia america eurasia

prediction:  was held for the first time in africa antarctica asia australia europe north america south america afro eurasia america the

 epoch: 9270 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9271 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 62%|██████▏   | 9274/15000 [18:43<09:27, 10.08it/s]


 epoch: 9272 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9273 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 9274 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 62%|██████▏   | 9276/15000 [18:43<09:17, 10.26it/s]


 epoch: 9275 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9276 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 62%|██████▏   | 9278/15000 [18:43<11:56,  7.98it/s]


 epoch: 9277 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9278 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9279 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 62%|██████▏   | 9280/15000 [18:44<11:22,  8.38it/s]


input:       have acceded to the treaty countries can participate in decision making if they can demonstrate that they do significant

target:      have acceded to the treaty countries can participate in decision making if they can demonstrate that they do significant research

prediction:  have acceded to the treaty countries can participate in decision making if they can demonstrate that they do significant the

 epoch: 9280 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9281 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 62%|██████▏   | 9284/15000 [18:44<09:55,  9.60it/s]


 epoch: 9282 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9283 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9284 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 62%|██████▏   | 9286/15000 [18:44<09:50,  9.67it/s]


 epoch: 9285 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9286 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9287 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 62%|██████▏   | 9290/15000 [18:45<09:33,  9.96it/s]


 epoch: 9288 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9289 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

input:       individual city senator was not itself an elected office in ancient rome an individual gained admission to the senate

target:      individual city senator was not itself an elected office in ancient rome an individual gained admission to the senate after

prediction:  individual city senator was not itself an elected office in ancient rome an individual gained admission to the senate the

 epoch: 9290 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 62%|██████▏   | 9292/15000 [18:45<10:15,  9.28it/s]


 epoch: 9291 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9292 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 62%|██████▏   | 9295/15000 [18:45<09:27, 10.05it/s]


 epoch: 9293 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9294 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9295 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 62%|██████▏   | 9297/15000 [18:45<09:11, 10.34it/s]


 epoch: 9296 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9297 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9298 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 62%|██████▏   | 9299/15000 [18:45<09:10, 10.36it/s]


 epoch: 9299 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.1%

input:       which is the most recent period of geologic time magma is the original unlithified source of all igneous rocks

target:      which is the most recent period of geologic time magma is the original unlithified source of all igneous rocks the

prediction:  which is the most recent period of geologic time magma is the original unlithified source of all igneous rocks the

 epoch: 9300 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.3%


 62%|██████▏   | 9303/15000 [18:46<09:12, 10.31it/s]


 epoch: 9301 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9302 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9303 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%


 62%|██████▏   | 9305/15000 [18:46<08:56, 10.61it/s]


 epoch: 9304 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9305 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 62%|██████▏   | 9307/15000 [18:46<10:19,  9.20it/s]


 epoch: 9306 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9307 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9308 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 62%|██████▏   | 9311/15000 [18:47<09:40,  9.81it/s]


 epoch: 9309 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       say spectacles could quickly become sites of social and political protest and emperors sometimes had to deploy force to

target:      say spectacles could quickly become sites of social and political protest and emperors sometimes had to deploy force to put

prediction:  say spectacles could quickly become sites of social and political protest and emperors sometimes had to deploy force to the

 epoch: 9310 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9311 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 62%|██████▏   | 9313/15000 [18:47<09:23, 10.09it/s]


 epoch: 9312 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9313 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9314 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 62%|██████▏   | 9317/15000 [18:47<08:53, 10.66it/s]


 epoch: 9315 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9316 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9317 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9318 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 62%|██████▏   | 9320/15000 [18:48<15:12,  6.22it/s]


 epoch: 9319 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       information about the man who attacked alt right figure richard spencer the forum users and moderators accused reddit administrators

target:      information about the man who attacked alt right figure richard spencer the forum users and moderators accused reddit administrators of

prediction:  information about the man who attacked alt right figure richard spencer the forum users and moderators accused reddit administrators the

 epoch: 9320 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 62%|██████▏   | 9323/15000 [18:48<11:58,  7.91it/s]


 epoch: 9321 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9322 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9323 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 62%|██████▏   | 9325/15000 [18:49<11:18,  8.36it/s]


 epoch: 9324 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9325 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9326 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 62%|██████▏   | 9329/15000 [18:49<09:18, 10.15it/s]


 epoch: 9327 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 9328 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9329 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 62%|██████▏   | 9331/15000 [18:49<09:59,  9.46it/s]


input:       presumed homosexuality and its role in his art particularly in the androgyny and eroticism manifested in saint john the

target:      presumed homosexuality and its role in his art particularly in the androgyny and eroticism manifested in saint john the baptist

prediction:  presumed homosexuality and its role in his art particularly in the androgyny and eroticism manifested in saint john the the

 epoch: 9330 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9331 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 62%|██████▏   | 9333/15000 [18:49<10:02,  9.41it/s]


 epoch: 9332 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 9333 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 62%|██████▏   | 9335/15000 [18:50<09:28,  9.96it/s]


 epoch: 9334 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9335 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 9336 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 62%|██████▏   | 9339/15000 [18:50<08:39, 10.89it/s]


 epoch: 9337 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.19, test_acc: 97.4%

 epoch: 9338 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9339 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%


 62%|██████▏   | 9341/15000 [18:50<09:15, 10.19it/s]


input:       when athens sent force to aid sparta in overcoming helot revolt but this aid was rejected by the spartans

target:      when athens sent force to aid sparta in overcoming helot revolt but this aid was rejected by the spartans in

prediction:  when athens sent force to aid sparta in overcoming helot revolt but this aid was rejected by the spartans the

 epoch: 9340 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9341 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 62%|██████▏   | 9343/15000 [18:50<09:14, 10.21it/s]


 epoch: 9342 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.5%

 epoch: 9343 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9344 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 62%|██████▏   | 9345/15000 [18:50<09:05, 10.37it/s]


 epoch: 9345 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9346 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%


 62%|██████▏   | 9348/15000 [18:51<14:41,  6.41it/s]


 epoch: 9347 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9348 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%


 62%|██████▏   | 9350/15000 [18:52<14:54,  6.32it/s]


 epoch: 9349 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       lessening the heavy and persistent rains in central and eastern africa since this time dry conditions have prevailed in

target:      lessening the heavy and persistent rains in central and eastern africa since this time dry conditions have prevailed in eastern

prediction:  lessening the heavy and persistent rains in central and eastern africa since this time dry conditions have prevailed in the

 epoch: 9350 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 62%|██████▏   | 9352/15000 [18:52<13:14,  7.11it/s]


 epoch: 9351 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9352 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 62%|██████▏   | 9354/15000 [18:52<12:13,  7.70it/s]


 epoch: 9353 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9354 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.4%


 62%|██████▏   | 9356/15000 [18:52<11:58,  7.85it/s]


 epoch: 9355 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9356 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 62%|██████▏   | 9358/15000 [18:53<12:25,  7.57it/s]


 epoch: 9357 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9358 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 62%|██████▏   | 9359/15000 [18:53<12:21,  7.61it/s]


 epoch: 9359 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

input:       sheriff departments in their municipal or county jurisdictions the state police departments have authority in their respective state and

target:      sheriff departments in their municipal or county jurisdictions the state police departments have authority in their respective state and federal

prediction:  sheriff departments in their municipal or county jurisdictions the state police departments have authority in their respective state and the


 62%|██████▏   | 9360/15000 [18:53<14:39,  6.41it/s]


 epoch: 9360 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%


 62%|██████▏   | 9362/15000 [18:54<22:17,  4.21it/s]


 epoch: 9361 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9362 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 62%|██████▏   | 9364/15000 [18:54<16:35,  5.66it/s]


 epoch: 9363 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9364 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.4%


 62%|██████▏   | 9366/15000 [18:54<13:52,  6.77it/s]


 epoch: 9365 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9366 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 62%|██████▏   | 9368/15000 [18:54<13:06,  7.16it/s]


 epoch: 9367 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9368 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 62%|██████▏   | 9370/15000 [18:55<12:14,  7.67it/s]


 epoch: 9369 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       the originator of the term scientist in his book the philosophy of the inductive sciences standard undergraduate physics curriculum

target:      the originator of the term scientist in his book the philosophy of the inductive sciences standard undergraduate physics curriculum consists

prediction:  the originator of the term scientist in his book the philosophy of the inductive sciences standard undergraduate physics curriculum the

 epoch: 9370 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 62%|██████▏   | 9372/15000 [18:55<10:58,  8.55it/s]


 epoch: 9371 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9372 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9373 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 62%|██████▏   | 9374/15000 [18:55<10:19,  9.09it/s]


 epoch: 9374 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9375 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 63%|██████▎   | 9378/15000 [18:56<11:08,  8.41it/s]


 epoch: 9376 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.4%

 epoch: 9377 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9378 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 63%|██████▎   | 9380/15000 [18:56<11:22,  8.24it/s]


 epoch: 9379 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

input:       innards are discouraging to any but the pure scientific mind counter to this argument protests against offensive cartoons invite

target:      innards are discouraging to any but the pure scientific mind counter to this argument protests against offensive cartoons invite the

prediction:  innards are discouraging to any but the pure scientific mind counter to this argument protests against offensive cartoons invite the

 epoch: 9380 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 63%|██████▎   | 9382/15000 [18:56<10:15,  9.12it/s]


 epoch: 9381 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9382 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9383 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 63%|██████▎   | 9386/15000 [18:56<09:05, 10.30it/s]


 epoch: 9384 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9385 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9386 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%


 63%|██████▎   | 9388/15000 [18:56<09:08, 10.23it/s]


 epoch: 9387 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 9388 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9389 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

input:       adoration of the magi commission from the monks of san donato scopeto it is complex composition of about centimetres

target:      adoration of the magi commission from the monks of san donato scopeto it is complex composition of about centimetres leonardo

prediction:  adoration of the magi commission from the monks of san donato scopeto it is complex composition of about centimetres the


 63%|██████▎   | 9392/15000 [18:57<13:39,  6.84it/s]


 epoch: 9390 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9391 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9392 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 63%|██████▎   | 9395/15000 [18:58<11:08,  8.39it/s]


 epoch: 9393 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9394 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.1%

 epoch: 9395 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 63%|██████▎   | 9397/15000 [18:58<10:12,  9.14it/s]


 epoch: 9396 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.6%

 epoch: 9397 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9398 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 63%|██████▎   | 9399/15000 [18:58<09:36,  9.72it/s]


 epoch: 9399 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

input:       serious concern un chief ant nio guterres for instance has described it as an existential threat to humanity furthermore

target:      serious concern un chief ant nio guterres for instance has described it as an existential threat to humanity furthermore the

prediction:  serious concern un chief ant nio guterres for instance has described it as an existential threat to humanity furthermore the

 epoch: 9400 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 63%|██████▎   | 9403/15000 [18:58<09:20,  9.98it/s]


 epoch: 9401 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9402 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 9403 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 63%|██████▎   | 9405/15000 [18:59<15:13,  6.13it/s]


 epoch: 9404 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 9405 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9406 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 63%|██████▎   | 9409/15000 [18:59<11:32,  8.07it/s]


 epoch: 9407 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 9408 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9409 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 63%|██████▎   | 9410/15000 [18:59<11:47,  7.90it/s]


input:       peninsula as the province of achaea in bc greece was key eastern province of the roman empire as the

target:      peninsula as the province of achaea in bc greece was key eastern province of the roman empire as the roman

prediction:  peninsula as the province of achaea in bc greece was key eastern province of the roman empire as the the

 epoch: 9410 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9411 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 63%|██████▎   | 9414/15000 [19:00<09:51,  9.45it/s]


 epoch: 9412 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9413 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9414 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.8%


 63%|██████▎   | 9416/15000 [19:00<09:19,  9.99it/s]


 epoch: 9415 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9416 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 9417 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%


 63%|██████▎   | 9420/15000 [19:01<11:33,  8.05it/s]


 epoch: 9418 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9419 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

input:       enable continual evolution and adaptation in response to coevolution with other species in an ever changing environment another hypothesis

target:      enable continual evolution and adaptation in response to coevolution with other species in an ever changing environment another hypothesis is

prediction:  enable continual evolution and adaptation in response to coevolution with other species in an ever changing environment another hypothesis the

 epoch: 9420 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 63%|██████▎   | 9422/15000 [19:01<10:28,  8.88it/s]


 epoch: 9421 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 9422 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9423 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%


 63%|██████▎   | 9426/15000 [19:01<09:01, 10.30it/s]


 epoch: 9424 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9425 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.1%

 epoch: 9426 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 63%|██████▎   | 9428/15000 [19:01<08:53, 10.45it/s]


 epoch: 9427 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%

 epoch: 9428 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.8%

 epoch: 9429 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 63%|██████▎   | 9430/15000 [19:02<09:26,  9.84it/s]


input:       interaction of malay or the colonial languages with indigenous languages such as tok pisin bislama chavacano various malay trade

target:      interaction of malay or the colonial languages with indigenous languages such as tok pisin bislama chavacano various malay trade and

prediction:  interaction of malay or the colonial languages with indigenous languages such as tok pisin bislama chavacano various malay trade the

 epoch: 9430 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%

 epoch: 9431 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%


 63%|██████▎   | 9434/15000 [19:02<13:31,  6.86it/s]


 epoch: 9432 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9433 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9434 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 63%|██████▎   | 9437/15000 [19:03<11:07,  8.34it/s]


 epoch: 9435 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9436 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9437 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 63%|██████▎   | 9439/15000 [19:03<10:11,  9.09it/s]


 epoch: 9438 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9439 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       of the northern plains are the celtic sea the north sea the baltic sea complex and barents sea the

target:      of the northern plains are the celtic sea the north sea the baltic sea complex and barents sea the northern

prediction:  of the northern plains are the celtic sea the north sea the baltic sea complex and barents sea the the


 63%|██████▎   | 9441/15000 [19:03<10:06,  9.17it/s]


 epoch: 9440 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9441 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9442 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.25, test_acc: 96.8%


 63%|██████▎   | 9445/15000 [19:03<09:13, 10.04it/s]


 epoch: 9443 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9444 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9445 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 63%|██████▎   | 9447/15000 [19:04<15:44,  5.88it/s]


 epoch: 9446 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9447 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9448 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 63%|██████▎   | 9450/15000 [19:04<13:08,  7.04it/s]


 epoch: 9449 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       which were created specially for custom maps in java edition data packs introduced in version of the java edition

target:      which were created specially for custom maps in java edition data packs introduced in version of the java edition allow

prediction:  which were created specially for custom maps in java edition data packs introduced in version of the java edition the

 epoch: 9450 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 63%|██████▎   | 9452/15000 [19:05<12:13,  7.56it/s]


 epoch: 9451 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9452 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 63%|██████▎   | 9454/15000 [19:05<10:59,  8.41it/s]


 epoch: 9453 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9454 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9455 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 63%|██████▎   | 9457/15000 [19:05<10:31,  8.78it/s]


 epoch: 9456 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.1%

 epoch: 9457 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.26, test_acc: 96.9%


 63%|██████▎   | 9459/15000 [19:05<10:17,  8.97it/s]


 epoch: 9458 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9459 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       atlantic ridge the arabian plate and somali plate to the east the eurasian plate aegean sea plate and anatolian


 63%|██████▎   | 9460/15000 [19:05<11:24,  8.10it/s]


target:      atlantic ridge the arabian plate and somali plate to the east the eurasian plate aegean sea plate and anatolian plate

prediction:  atlantic ridge the arabian plate and somali plate to the east the eurasian plate aegean sea plate and anatolian the

 epoch: 9460 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%


 63%|██████▎   | 9462/15000 [19:06<17:51,  5.17it/s]


 epoch: 9461 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9462 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 63%|██████▎   | 9464/15000 [19:06<14:25,  6.39it/s]


 epoch: 9463 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 9464 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%


 63%|██████▎   | 9466/15000 [19:07<12:22,  7.45it/s]


 epoch: 9465 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9466 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 63%|██████▎   | 9468/15000 [19:07<11:31,  8.00it/s]


 epoch: 9467 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9468 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.9%


 63%|██████▎   | 9470/15000 [19:07<11:19,  8.14it/s]


 epoch: 9469 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

input:       leader of germany and began to work towards his goal of building greater germany germany re expanded and took

target:      leader of germany and began to work towards his goal of building greater germany germany re expanded and took back

prediction:  leader of germany and began to work towards his goal of building greater germany germany re expanded and took the

 epoch: 9470 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 63%|██████▎   | 9472/15000 [19:07<11:20,  8.12it/s]


 epoch: 9471 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9472 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 63%|██████▎   | 9473/15000 [19:07<10:45,  8.56it/s]


 epoch: 9473 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9474 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 63%|██████▎   | 9476/15000 [19:08<17:07,  5.38it/s]


 epoch: 9475 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9476 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 63%|██████▎   | 9478/15000 [19:08<13:44,  6.70it/s]


 epoch: 9477 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9478 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%

 epoch: 9479 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 63%|██████▎   | 9480/15000 [19:09<12:36,  7.30it/s]


input:       habit of purchasing caged birds and releasing them leonardo had many friends who are now notable either in their

target:      habit of purchasing caged birds and releasing them leonardo had many friends who are now notable either in their fields

prediction:  habit of purchasing caged birds and releasing them leonardo had many friends who are now notable either in their the

 epoch: 9480 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 63%|██████▎   | 9482/15000 [19:09<10:59,  8.36it/s]


 epoch: 9481 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9482 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9483 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 63%|██████▎   | 9486/15000 [19:09<09:17,  9.89it/s]


 epoch: 9484 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9485 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9486 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%


 63%|██████▎   | 9488/15000 [19:09<09:17,  9.89it/s]


 epoch: 9487 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9488 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9489 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.0%


 63%|██████▎   | 9490/15000 [19:10<09:38,  9.53it/s]


input:       describe the invasion annexation division and colonization of most of africa by seven western european powers during an era

target:      describe the invasion annexation division and colonization of most of africa by seven western european powers during an era known

prediction:  describe the invasion annexation division and colonization of most of africa by seven western european powers during an era the

 epoch: 9490 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9491 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%


 63%|██████▎   | 9494/15000 [19:10<08:53, 10.33it/s]


 epoch: 9492 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9493 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9494 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 63%|██████▎   | 9496/15000 [19:10<08:51, 10.36it/s]


 epoch: 9495 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9496 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9497 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 63%|██████▎   | 9498/15000 [19:10<08:51, 10.36it/s]


 epoch: 9498 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9499 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       on his death granted it to ferdinand and isabella and their successors who never used it when the ottomans

target:      on his death granted it to ferdinand and isabella and their successors who never used it when the ottomans who

prediction:  on his death granted it to ferdinand and isabella and their successors who never used it when the ottomans the


 63%|██████▎   | 9501/15000 [19:11<09:20,  9.81it/s]


 epoch: 9500 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9501 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.1%


 63%|██████▎   | 9503/15000 [19:11<09:36,  9.53it/s]


 epoch: 9502 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9503 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 63%|██████▎   | 9506/15000 [19:12<14:52,  6.16it/s]


 epoch: 9504 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%

 epoch: 9505 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.1%

 epoch: 9506 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 63%|██████▎   | 9508/15000 [19:12<12:25,  7.37it/s]


 epoch: 9507 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9508 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.27, test_acc: 96.9%

 epoch: 9509 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 63%|██████▎   | 9510/15000 [19:12<11:36,  7.88it/s]


input:       deposed orestes usurper son romulus augustus this event has been traditionally considered the fall of the roman empire in

target:      deposed orestes usurper son romulus augustus this event has been traditionally considered the fall of the roman empire in the

prediction:  deposed orestes usurper son romulus augustus this event has been traditionally considered the fall of the roman empire in the

 epoch: 9510 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9511 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 63%|██████▎   | 9514/15000 [19:12<09:46,  9.35it/s]


 epoch: 9512 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9513 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9514 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 63%|██████▎   | 9516/15000 [19:13<09:29,  9.64it/s]


 epoch: 9515 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 96.9%

 epoch: 9516 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9517 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 63%|██████▎   | 9520/15000 [19:13<13:54,  6.57it/s]


 epoch: 9518 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9519 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

input:       colombia and ecuador and the second group in peru and chile south america is also home to one of

target:      colombia and ecuador and the second group in peru and chile south america is also home to one of the

prediction:  colombia and ecuador and the second group in peru and chile south america is also home to one of the

 epoch: 9520 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 63%|██████▎   | 9523/15000 [19:14<11:21,  8.04it/s]


 epoch: 9521 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9522 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9523 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 64%|██████▎   | 9525/15000 [19:14<10:21,  8.81it/s]


 epoch: 9524 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9525 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9526 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 64%|██████▎   | 9529/15000 [19:14<09:07,  9.99it/s]


 epoch: 9527 | train_loss: 0.23, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9528 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9529 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%


 64%|██████▎   | 9531/15000 [19:14<09:45,  9.34it/s]


input:       unit that includes all of the organisms ie the community in given area interacting with the physical environment so

target:      unit that includes all of the organisms ie the community in given area interacting with the physical environment so that

prediction:  unit that includes all of the organisms ie the community in given area interacting with the physical environment so the

 epoch: 9530 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9531 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 64%|██████▎   | 9533/15000 [19:15<09:34,  9.52it/s]


 epoch: 9532 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9533 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9534 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 64%|██████▎   | 9537/15000 [19:15<08:55, 10.21it/s]


 epoch: 9535 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9536 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%

 epoch: 9537 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 64%|██████▎   | 9539/15000 [19:15<08:56, 10.18it/s]


 epoch: 9538 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9539 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       widely improved iron working and light chariots with spoked wheels in warfare the hittites introduced the casting of iron

target:      widely improved iron working and light chariots with spoked wheels in warfare the hittites introduced the casting of iron with

prediction:  widely improved iron working and light chariots with spoked wheels in warfare the hittites introduced the casting of iron the


 64%|██████▎   | 9541/15000 [19:15<09:36,  9.47it/s]


 epoch: 9540 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9541 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9542 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 64%|██████▎   | 9545/15000 [19:16<08:45, 10.39it/s]


 epoch: 9543 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9544 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 9545 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9546 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 64%|██████▎   | 9548/15000 [19:16<10:12,  8.90it/s]


 epoch: 9547 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9548 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.7%

 epoch: 9549 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 64%|██████▎   | 9550/15000 [19:16<10:19,  8.80it/s]


input:       source needed private schools usually rely on fees from families whose children attend the school for funding however sometimes

target:      source needed private schools usually rely on fees from families whose children attend the school for funding however sometimes such

prediction:  source needed private schools usually rely on fees from families whose children attend the school for funding however sometimes the

 epoch: 9550 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 9551 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 64%|██████▎   | 9553/15000 [19:17<09:36,  9.46it/s]


 epoch: 9552 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9553 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9554 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 64%|██████▎   | 9557/15000 [19:17<08:39, 10.49it/s]


 epoch: 9555 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 9556 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9557 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 64%|██████▎   | 9559/15000 [19:17<08:46, 10.34it/s]


 epoch: 9558 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 9559 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

input:       discovered by europeans during the th century approximately half of the population on these islands are european australian mainlanders

target:      discovered by europeans during the th century approximately half of the population on these islands are european australian mainlanders with

prediction:  discovered by europeans during the th century approximately half of the population on these islands are european australian mainlanders the


 64%|██████▎   | 9561/15000 [19:18<09:27,  9.59it/s]


 epoch: 9560 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9561 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%


 64%|██████▍   | 9564/15000 [19:18<13:34,  6.67it/s]


 epoch: 9562 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9563 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9564 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 64%|██████▍   | 9566/15000 [19:18<11:45,  7.70it/s]


 epoch: 9565 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 9566 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 64%|██████▍   | 9568/15000 [19:19<11:05,  8.17it/s]


 epoch: 9567 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9568 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.2%


 64%|██████▍   | 9570/15000 [19:19<11:36,  7.80it/s]


 epoch: 9569 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

input:       in the united states launched total of spaceflights the had active satellites in space in december the highest

target:      in the united states launched total of spaceflights the had active satellites in space in december the highest number

prediction:  in the united states launched total of spaceflights the had active satellites in space in december the highest the

 epoch: 9570 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 64%|██████▍   | 9572/15000 [19:19<11:10,  8.10it/s]


 epoch: 9571 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9572 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 64%|██████▍   | 9574/15000 [19:19<11:22,  7.95it/s]


 epoch: 9573 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9574 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 64%|██████▍   | 9575/15000 [19:20<11:19,  7.99it/s]


 epoch: 9575 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9576 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 64%|██████▍   | 9577/15000 [19:20<12:36,  7.17it/s]


 epoch: 9577 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 64%|██████▍   | 9579/15000 [19:20<11:27,  7.88it/s]


 epoch: 9578 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9579 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

input:       melded psychology with the nazi theory of biology and racial origins criticizing psychoanalysis as study of the weak and


 64%|██████▍   | 9580/15000 [19:20<12:19,  7.33it/s]


target:      melded psychology with the nazi theory of biology and racial origins criticizing psychoanalysis as study of the weak and deformed

prediction:  melded psychology with the nazi theory of biology and racial origins criticizing psychoanalysis as study of the weak and the

 epoch: 9580 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9581 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%


 64%|██████▍   | 9583/15000 [19:21<10:47,  8.37it/s]


 epoch: 9582 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9583 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 64%|██████▍   | 9585/15000 [19:21<10:21,  8.71it/s]


 epoch: 9584 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9585 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 64%|██████▍   | 9587/15000 [19:21<10:42,  8.42it/s]


 epoch: 9586 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9587 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.9%


 64%|██████▍   | 9589/15000 [19:21<11:03,  8.16it/s]


 epoch: 9588 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9589 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.4%

input:       in using psychoanalysis and other forms of psychodynamic psychotherapy to understand and treat the mentally ill psychotherapy as conducted

target:      in using psychoanalysis and other forms of psychodynamic psychotherapy to understand and treat the mentally ill psychotherapy as conducted by

prediction:  in using psychoanalysis and other forms of psychodynamic psychotherapy to understand and treat the mentally ill psychotherapy as conducted the


 64%|██████▍   | 9591/15000 [19:22<21:11,  4.25it/s]


 epoch: 9590 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9591 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 64%|██████▍   | 9593/15000 [19:22<16:21,  5.51it/s]


 epoch: 9592 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9593 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 64%|██████▍   | 9595/15000 [19:23<13:37,  6.61it/s]


 epoch: 9594 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9595 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.4%


 64%|██████▍   | 9597/15000 [19:23<11:55,  7.55it/s]


 epoch: 9596 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9597 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 64%|██████▍   | 9598/15000 [19:23<11:03,  8.14it/s]


 epoch: 9598 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9599 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

input:       aristotle divided all living things between plants which generally do not move fast enough for humans to notice and

target:      aristotle divided all living things between plants which generally do not move fast enough for humans to notice and animals

prediction:  aristotle divided all living things between plants which generally do not move fast enough for humans to notice and the


 64%|██████▍   | 9601/15000 [19:23<10:40,  8.43it/s]


 epoch: 9600 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9601 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 64%|██████▍   | 9603/15000 [19:23<10:07,  8.88it/s]


 epoch: 9602 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9603 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 64%|██████▍   | 9604/15000 [19:24<09:56,  9.05it/s]


 epoch: 9604 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 64%|██████▍   | 9607/15000 [19:24<14:35,  6.16it/s]


 epoch: 9605 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9606 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9607 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 64%|██████▍   | 9608/15000 [19:24<13:32,  6.63it/s]


 epoch: 9608 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9609 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

input:       development and developmental processes produce novel features these studies have shown that evolution can alter development to produce new

target:      development and developmental processes produce novel features these studies have shown that evolution can alter development to produce new structures

prediction:  development and developmental processes produce novel features these studies have shown that evolution can alter development to produce new the


 64%|██████▍   | 9612/15000 [19:25<10:28,  8.57it/s]


 epoch: 9610 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9611 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9612 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 64%|██████▍   | 9615/15000 [19:25<09:27,  9.50it/s]


 epoch: 9613 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9614 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9615 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 64%|██████▍   | 9617/15000 [19:25<09:20,  9.61it/s]


 epoch: 9616 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9617 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9618 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 64%|██████▍   | 9620/15000 [19:26<12:24,  7.23it/s]


 epoch: 9619 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

input:       local split screen console only and servers player hosted and business hosted players can run their own servers use

target:      local split screen console only and servers player hosted and business hosted players can run their own servers use hosting

prediction:  local split screen console only and servers player hosted and business hosted players can run their own servers use the

 epoch: 9620 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 64%|██████▍   | 9622/15000 [19:26<10:43,  8.35it/s]


 epoch: 9621 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.0%

 epoch: 9622 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 9623 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 64%|██████▍   | 9626/15000 [19:26<09:00,  9.94it/s]


 epoch: 9624 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9625 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9626 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 64%|██████▍   | 9628/15000 [19:27<08:42, 10.28it/s]


 epoch: 9627 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9628 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 9629 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 64%|██████▍   | 9630/15000 [19:27<09:12,  9.71it/s]


input:       expressing that he couldn help but feel that had been there and done that dozen times before although reviewers

target:      expressing that he couldn help but feel that had been there and done that dozen times before although reviewers liked

prediction:  expressing that he couldn help but feel that had been there and done that dozen times before although reviewers the

 epoch: 9630 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 9631 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 64%|██████▍   | 9632/15000 [19:27<09:09,  9.77it/s]


 epoch: 9632 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 64%|██████▍   | 9634/15000 [19:28<14:51,  6.02it/s]


 epoch: 9633 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9634 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9635 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 64%|██████▍   | 9638/15000 [19:28<10:35,  8.44it/s]


 epoch: 9636 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9637 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9638 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 64%|██████▍   | 9640/15000 [19:28<10:15,  8.70it/s]


 epoch: 9639 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%

input:       selling vita game in the country minecraft helped improve microsoft total first party revenue by million for the second

target:      selling vita game in the country minecraft helped improve microsoft total first party revenue by million for the second quarter

prediction:  selling vita game in the country minecraft helped improve microsoft total first party revenue by million for the second the

 epoch: 9640 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 64%|██████▍   | 9642/15000 [19:28<09:30,  9.40it/s]


 epoch: 9641 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9642 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9643 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 64%|██████▍   | 9646/15000 [19:29<08:50, 10.09it/s]


 epoch: 9644 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9645 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9646 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 64%|██████▍   | 9648/15000 [19:29<14:41,  6.07it/s]


 epoch: 9647 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9648 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9649 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 64%|██████▍   | 9650/15000 [19:30<13:08,  6.79it/s]


input:       on the territory of georgia in the caucasus convention still followed by herodotus in the th century bce herodotus

target:      on the territory of georgia in the caucasus convention still followed by herodotus in the th century bce herodotus mentioned

prediction:  on the territory of georgia in the caucasus convention still followed by herodotus in the th century bce herodotus the

 epoch: 9650 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9651 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 64%|██████▍   | 9654/15000 [19:30<10:36,  8.41it/s]


 epoch: 9652 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9653 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%

 epoch: 9654 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%


 64%|██████▍   | 9657/15000 [19:30<09:33,  9.31it/s]


 epoch: 9655 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9656 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9657 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 64%|██████▍   | 9659/15000 [19:30<09:15,  9.62it/s]


 epoch: 9658 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9659 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       to critical imports such as bronze and wood the new kingdom pharaohs began large scale building campaign to promote

target:      to critical imports such as bronze and wood the new kingdom pharaohs began large scale building campaign to promote the

prediction:  to critical imports such as bronze and wood the new kingdom pharaohs began large scale building campaign to promote the


 64%|██████▍   | 9661/15000 [19:31<10:29,  8.47it/s]


 epoch: 9660 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9661 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 64%|██████▍   | 9663/15000 [19:31<09:42,  9.16it/s]


 epoch: 9662 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9663 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9664 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 64%|██████▍   | 9667/15000 [19:31<08:39, 10.27it/s]


 epoch: 9665 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9666 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9667 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 64%|██████▍   | 9669/15000 [19:31<08:32, 10.41it/s]


 epoch: 9668 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9669 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       the conquests of alexander the great spread hellenistic civilization from the western mediterranean to central asia the hellenistic period

target:      the conquests of alexander the great spread hellenistic civilization from the western mediterranean to central asia the hellenistic period ended

prediction:  the conquests of alexander the great spread hellenistic civilization from the western mediterranean to central asia the hellenistic period the


 64%|██████▍   | 9671/15000 [19:32<08:56,  9.94it/s]


 epoch: 9670 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9671 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9672 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 64%|██████▍   | 9673/15000 [19:32<08:49, 10.06it/s]


 epoch: 9673 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9674 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 65%|██████▍   | 9677/15000 [19:33<13:00,  6.82it/s]


 epoch: 9675 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 9676 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9677 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 65%|██████▍   | 9679/15000 [19:33<12:01,  7.38it/s]


 epoch: 9678 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 9679 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       for how to take rubydung forward infiniminer heavily influenced the visual style of gameplay including bringing back the first


 65%|██████▍   | 9681/15000 [19:33<11:56,  7.42it/s]


target:      for how to take rubydung forward infiniminer heavily influenced the visual style of gameplay including bringing back the first person

prediction:  for how to take rubydung forward infiniminer heavily influenced the visual style of gameplay including bringing back the first the

 epoch: 9680 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9681 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 65%|██████▍   | 9684/15000 [19:33<10:01,  8.84it/s]


 epoch: 9682 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9683 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.5%

 epoch: 9684 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 65%|██████▍   | 9687/15000 [19:34<09:15,  9.57it/s]


 epoch: 9685 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9686 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9687 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 65%|██████▍   | 9689/15000 [19:34<09:11,  9.63it/s]


 epoch: 9688 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9689 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       many classifications of education it depends on the institutional framework whether education is formal non formal or informal levels

target:      many classifications of education it depends on the institutional framework whether education is formal non formal or informal levels of

prediction:  many classifications of education it depends on the institutional framework whether education is formal non formal or informal levels the


 65%|██████▍   | 9691/15000 [19:35<18:15,  4.85it/s]


 epoch: 9690 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9691 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%


 65%|██████▍   | 9693/15000 [19:35<14:19,  6.17it/s]


 epoch: 9692 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9693 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 65%|██████▍   | 9695/15000 [19:35<12:35,  7.02it/s]


 epoch: 9694 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9695 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 65%|██████▍   | 9697/15000 [19:35<11:34,  7.63it/s]


 epoch: 9696 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9697 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 65%|██████▍   | 9699/15000 [19:36<11:01,  8.01it/s]


 epoch: 9698 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9699 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

input:       regular censuses and the size of herd reflected the prestige and importance of the estate or temple that owned


 65%|██████▍   | 9700/15000 [19:36<11:55,  7.41it/s]


target:      regular censuses and the size of herd reflected the prestige and importance of the estate or temple that owned them

prediction:  regular censuses and the size of herd reflected the prestige and importance of the estate or temple that owned the

 epoch: 9700 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9701 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 65%|██████▍   | 9703/15000 [19:36<10:17,  8.58it/s]


 epoch: 9702 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 9703 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 65%|██████▍   | 9705/15000 [19:37<14:18,  6.17it/s]


 epoch: 9704 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9705 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 65%|██████▍   | 9707/15000 [19:37<12:20,  7.15it/s]


 epoch: 9706 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.4%

 epoch: 9707 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 65%|██████▍   | 9709/15000 [19:37<11:23,  7.75it/s]


 epoch: 9708 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9709 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

input:       akhenaten revolutionary religious ideas this style known as amarna art was quickly abandoned after akhenaten death and replaced by


 65%|██████▍   | 9710/15000 [19:37<12:08,  7.27it/s]


target:      akhenaten revolutionary religious ideas this style known as amarna art was quickly abandoned after akhenaten death and replaced by the

prediction:  akhenaten revolutionary religious ideas this style known as amarna art was quickly abandoned after akhenaten death and replaced by the

 epoch: 9710 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9711 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 65%|██████▍   | 9713/15000 [19:38<10:06,  8.71it/s]


 epoch: 9712 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9713 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 65%|██████▍   | 9715/15000 [19:38<09:57,  8.85it/s]


 epoch: 9714 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9715 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 65%|██████▍   | 9717/15000 [19:38<09:26,  9.32it/s]


 epoch: 9716 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9717 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 65%|██████▍   | 9718/15000 [19:39<21:33,  4.08it/s]


 epoch: 9718 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 9719 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       modern internet technology originates had been an early locus of radio engineering alongside the original mandate for robustness and

target:      modern internet technology originates had been an early locus of radio engineering alongside the original mandate for robustness and resiliency

prediction:  modern internet technology originates had been an early locus of radio engineering alongside the original mandate for robustness and the


 65%|██████▍   | 9721/15000 [19:39<14:25,  6.10it/s]


 epoch: 9720 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9721 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9722 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 65%|██████▍   | 9725/15000 [19:39<10:14,  8.58it/s]


 epoch: 9723 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9724 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9725 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 65%|██████▍   | 9727/15000 [19:39<09:22,  9.37it/s]


 epoch: 9726 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 9727 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9728 | train_loss: 0.26, train_acc: 96.6% | test_loss: 0.25, test_acc: 97.2%


 65%|██████▍   | 9729/15000 [19:40<09:01,  9.74it/s]


 epoch: 9729 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

input:       finite age and light travels at finite speed there may be events in the past whose light has not

target:      finite age and light travels at finite speed there may be events in the past whose light has not yet

prediction:  finite age and light travels at finite speed there may be events in the past whose light has not the

 epoch: 9730 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 65%|██████▍   | 9731/15000 [19:40<09:27,  9.28it/s]


 epoch: 9731 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9732 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 65%|██████▍   | 9735/15000 [19:40<09:16,  9.45it/s]


 epoch: 9733 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9734 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9735 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 65%|██████▍   | 9737/15000 [19:40<09:07,  9.61it/s]


 epoch: 9736 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9737 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9738 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 65%|██████▍   | 9739/15000 [19:41<08:46, 10.00it/s]


 epoch: 9739 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       been used as term in aeronautics before entering the world of computers in an interview grace hopper remarked that

target:      been used as term in aeronautics before entering the world of computers in an interview grace hopper remarked that she

prediction:  been used as term in aeronautics before entering the world of computers in an interview grace hopper remarked that the

 epoch: 9740 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 65%|██████▍   | 9742/15000 [19:41<09:18,  9.41it/s]


 epoch: 9741 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9742 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 65%|██████▍   | 9744/15000 [19:41<09:17,  9.43it/s]


 epoch: 9743 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9744 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 65%|██████▍   | 9745/15000 [19:41<09:32,  9.18it/s]


 epoch: 9745 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9746 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 65%|██████▍   | 9748/15000 [19:42<10:52,  8.05it/s]


 epoch: 9747 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9748 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9749 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 65%|██████▌   | 9750/15000 [19:42<10:26,  8.38it/s]


input:       study from an experimental standpoint such as identifying the causes of depression they conduct longitudinal studies large group of

target:      study from an experimental standpoint such as identifying the causes of depression they conduct longitudinal studies large group of depression

prediction:  study from an experimental standpoint such as identifying the causes of depression they conduct longitudinal studies large group of the

 epoch: 9750 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9751 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 65%|██████▌   | 9754/15000 [19:42<09:00,  9.71it/s]


 epoch: 9752 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9753 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9754 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 65%|██████▌   | 9756/15000 [19:43<08:44,  9.99it/s]


 epoch: 9755 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9756 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9757 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 65%|██████▌   | 9758/15000 [19:43<08:37, 10.12it/s]


 epoch: 9758 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9759 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

input:       ages estimated to have been around bce the sahara had again become green fertile valley and its african populations

target:      ages estimated to have been around bce the sahara had again become green fertile valley and its african populations returned

prediction:  ages estimated to have been around bce the sahara had again become green fertile valley and its african populations the


 65%|██████▌   | 9761/15000 [19:43<08:56,  9.77it/s]


 epoch: 9760 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9761 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 65%|██████▌   | 9763/15000 [19:44<14:44,  5.92it/s]


 epoch: 9762 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9763 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 9764 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 65%|██████▌   | 9767/15000 [19:44<10:32,  8.27it/s]


 epoch: 9765 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9766 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9767 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 65%|██████▌   | 9768/15000 [19:44<10:10,  8.57it/s]


 epoch: 9768 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9769 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

input:       mart argentina the two most important libertadores bol var led great uprising in the north then led his army

target:      mart argentina the two most important libertadores bol var led great uprising in the north then led his army southward

prediction:  mart argentina the two most important libertadores bol var led great uprising in the north then led his army the


 65%|██████▌   | 9771/15000 [19:44<09:42,  8.97it/s]


 epoch: 9770 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9771 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 65%|██████▌   | 9773/15000 [19:45<09:33,  9.11it/s]


 epoch: 9772 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9773 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 65%|██████▌   | 9774/15000 [19:45<09:21,  9.31it/s]


 epoch: 9774 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9775 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 65%|██████▌   | 9777/15000 [19:45<09:45,  8.92it/s]


 epoch: 9776 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9777 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9778 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 65%|██████▌   | 9780/15000 [19:45<09:27,  9.19it/s]


 epoch: 9779 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       the previous operations and introduced new character models and battle pass system in april source code for versions of

target:      the previous operations and introduced new character models and battle pass system in april source code for versions of counter

prediction:  the previous operations and introduced new character models and battle pass system in april source code for versions of the

 epoch: 9780 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 65%|██████▌   | 9782/15000 [19:46<08:56,  9.73it/s]


 epoch: 9781 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9782 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9783 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 65%|██████▌   | 9786/15000 [19:46<08:07, 10.69it/s]


 epoch: 9784 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9785 | train_loss: 0.21, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9786 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 65%|██████▌   | 9788/15000 [19:46<08:13, 10.56it/s]


 epoch: 9787 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9788 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9789 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       to have created internal strife between rich and poor in many city states in sparta the messenian wars resulted

target:      to have created internal strife between rich and poor in many city states in sparta the messenian wars resulted in

prediction:  to have created internal strife between rich and poor in many city states in sparta the messenian wars resulted the


 65%|██████▌   | 9792/15000 [19:47<10:36,  8.18it/s]


 epoch: 9790 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9791 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9792 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 65%|██████▌   | 9794/15000 [19:47<10:07,  8.57it/s]


 epoch: 9793 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.4%

 epoch: 9794 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 65%|██████▌   | 9797/15000 [19:47<10:22,  8.36it/s]


 epoch: 9795 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9796 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9797 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 65%|██████▌   | 9799/15000 [19:48<10:33,  8.21it/s]


 epoch: 9798 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9799 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%


 65%|██████▌   | 9800/15000 [19:48<12:11,  7.11it/s]


input:       greek influence throughout europe and also aided in the establishment of long distance trading networks between the greek city

target:      greek influence throughout europe and also aided in the establishment of long distance trading networks between the greek city states

prediction:  greek influence throughout europe and also aided in the establishment of long distance trading networks between the greek city the

 epoch: 9800 | train_loss: 0.20, train_acc: 97.5% | test_loss: 0.24, test_acc: 97.1%


 65%|██████▌   | 9802/15000 [19:48<11:32,  7.51it/s]


 epoch: 9801 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9802 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%


 65%|██████▌   | 9803/15000 [19:48<11:26,  7.57it/s]


 epoch: 9803 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 65%|██████▌   | 9805/15000 [19:49<15:55,  5.44it/s]


 epoch: 9804 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9805 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 65%|██████▌   | 9807/15000 [19:49<13:47,  6.28it/s]


 epoch: 9806 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9807 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 65%|██████▌   | 9809/15000 [19:49<12:14,  7.07it/s]


 epoch: 9808 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9809 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 65%|██████▌   | 9810/15000 [19:49<13:14,  6.54it/s]


input:       domestically the united states experienced economic growth urbanization and rapid population growth following world war ii the construction of

target:      domestically the united states experienced economic growth urbanization and rapid population growth following world war ii the construction of an

prediction:  domestically the united states experienced economic growth urbanization and rapid population growth following world war ii the construction of the

 epoch: 9810 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.8%


 65%|██████▌   | 9812/15000 [19:50<13:05,  6.61it/s]


 epoch: 9811 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9812 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.2%


 65%|██████▌   | 9814/15000 [19:50<12:20,  7.00it/s]


 epoch: 9813 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9814 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 65%|██████▌   | 9816/15000 [19:50<11:38,  7.43it/s]


 epoch: 9815 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9816 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 65%|██████▌   | 9818/15000 [19:50<11:04,  7.80it/s]


 epoch: 9817 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%

 epoch: 9818 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 65%|██████▌   | 9819/15000 [19:51<13:01,  6.63it/s]


 epoch: 9819 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       distance east and north from the mediterranean sea without the inclusion of any mountain ranges cartographer herman moll suggested

target:      distance east and north from the mediterranean sea without the inclusion of any mountain ranges cartographer herman moll suggested in

prediction:  distance east and north from the mediterranean sea without the inclusion of any mountain ranges cartographer herman moll suggested the

 epoch: 9820 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 65%|██████▌   | 9821/15000 [19:51<13:09,  6.56it/s]



 epoch: 9821 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 65%|██████▌   | 9823/15000 [19:51<11:48,  7.31it/s]


 epoch: 9822 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9823 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 66%|██████▌   | 9825/15000 [19:51<10:23,  8.29it/s]


 epoch: 9824 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9825 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 66%|██████▌   | 9827/15000 [19:52<09:37,  8.96it/s]


 epoch: 9826 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9827 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 66%|██████▌   | 9829/15000 [19:52<09:27,  9.11it/s]


 epoch: 9828 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9829 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       of comedy include screwball comedy which derives its humor largely from bizarre surprising and improbable situations or characters and

target:     

 66%|██████▌   | 9831/15000 [19:52<10:24,  8.27it/s]

 of comedy include screwball comedy which derives its humor largely from bizarre surprising and improbable situations or characters and black

prediction:  of comedy include screwball comedy which derives its humor largely from bizarre surprising and improbable situations or characters and the

 epoch: 9830 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9831 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 66%|██████▌   | 9832/15000 [19:52<10:23,  8.29it/s]


 epoch: 9832 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 66%|██████▌   | 9835/15000 [19:53<14:11,  6.06it/s]


 epoch: 9833 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 9834 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9835 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 66%|██████▌   | 9838/15000 [19:53<10:51,  7.92it/s]


 epoch: 9836 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9837 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9838 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.0%


 66%|██████▌   | 9840/15000 [19:53<10:44,  8.00it/s]


 epoch: 9839 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       if the connection was unencrypted as they could redirect the user to url of their choice recursive clones were

target:      if the connection was unencrypted as they could redirect the user to url of their choice recursive clones were also

prediction:  if the connection was unencrypted as they could redirect the user to url of their choice recursive clones were the

 epoch: 9840 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 66%|██████▌   | 9843/15000 [19:54<09:27,  9.08it/s]


 epoch: 9841 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 9842 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.4%

 epoch: 9843 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 66%|██████▌   | 9845/15000 [19:54<09:21,  9.18it/s]


 epoch: 9844 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9845 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9846 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.20, test_acc: 97.3%


 66%|██████▌   | 9848/15000 [19:55<15:02,  5.71it/s]


 epoch: 9847 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9848 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.4%

 epoch: 9849 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 66%|██████▌   | 9850/15000 [19:55<12:50,  6.69it/s]


input:       the monkeys in process by which species become more numerous the new science of the th century rejected the

target:      the monkeys in process by which species become more numerous the new science of the th century rejected the aristotelian

prediction:  the monkeys in process by which species become more numerous the new science of the th century rejected the the

 epoch: 9850 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9851 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 66%|██████▌   | 9853/15000 [19:55<10:41,  8.02it/s]


 epoch: 9852 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9853 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 9854 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%


 66%|██████▌   | 9857/15000 [19:56<08:47,  9.75it/s]


 epoch: 9855 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9856 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9857 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 66%|██████▌   | 9859/15000 [19:56<08:53,  9.65it/s]


 epoch: 9858 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9859 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       suffered land and agrarian crisis in the late th century bc again resulting in civil strife the archon chief

target:      suffered land and agrarian crisis in the late th century bc again resulting in civil strife the archon chief magistrate

prediction:  suffered land and agrarian crisis in the late th century bc again resulting in civil strife the archon chief the

 epoch: 9860 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 66%|██████▌   | 9863/15000 [19:57<13:08,  6.51it/s]


 epoch: 9861 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9862 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9863 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 66%|██████▌   | 9866/15000 [19:57<10:46,  7.94it/s]


 epoch: 9864 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9865 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9866 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.8%


 66%|██████▌   | 9868/15000 [19:57<09:38,  8.87it/s]


 epoch: 9867 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9868 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9869 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 66%|██████▌   | 9870/15000 [19:57<09:40,  8.83it/s]


input:       expansion of the universe and today corresponds to approximately this tipped the balance of evidence in favor of the

target:      expansion of the universe and today corresponds to approximately this tipped the balance of evidence in favor of the big

prediction:  expansion of the universe and today corresponds to approximately this tipped the balance of evidence in favor of the the

 epoch: 9870 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.19, test_acc: 97.1%

 epoch: 9871 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%


 66%|██████▌   | 9874/15000 [19:58<08:40,  9.85it/s]


 epoch: 9872 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9873 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9874 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 66%|██████▌   | 9876/15000 [19:58<12:02,  7.10it/s]


 epoch: 9875 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9876 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9877 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 66%|██████▌   | 9880/15000 [19:59<10:03,  8.49it/s]


 epoch: 9878 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9879 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

input:       as having low levels of both actual and perceived corruption and human rights rankings place the united states highly

target:      as having low levels of both actual and perceived corruption and human rights rankings place the united states highly the

prediction:  as having low levels of both actual and perceived corruption and human rights rankings place the united states highly the

 epoch: 9880 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 66%|██████▌   | 9883/15000 [19:59<08:59,  9.49it/s]


 epoch: 9881 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9882 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 9883 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 66%|██████▌   | 9885/15000 [19:59<08:37,  9.88it/s]


 epoch: 9884 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9885 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9886 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 66%|██████▌   | 9889/15000 [19:59<08:14, 10.35it/s]


 epoch: 9887 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 9888 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9889 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

input:       for segregated schools and economic functions including the preferential training of black americans for manual labor these practices were

target:      for segregated schools and economic functions including the preferential training of black americans for manual labor these practices were criticized

prediction:  for segregated schools and economic functions including the preferential training of black americans for manual labor these practices were the


 66%|██████▌   | 9891/15000 [20:00<15:05,  5.64it/s]


 epoch: 9890 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9891 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9892 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 66%|██████▌   | 9895/15000 [20:00<11:08,  7.63it/s]


 epoch: 9893 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9894 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9895 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 66%|██████▌   | 9897/15000 [20:01<10:15,  8.29it/s]


 epoch: 9896 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9897 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9898 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 66%|██████▌   | 9899/15000 [20:01<09:26,  9.00it/s]


 epoch: 9899 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

input:       are taken into consideration when user trust factor is developed this was done in an attempt to let the

target:      are taken into consideration when user trust factor is developed this was done in an attempt to let the community

prediction:  are taken into consideration when user trust factor is developed this was done in an attempt to let the the

 epoch: 9900 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 66%|██████▌   | 9902/15000 [20:01<09:16,  9.16it/s]


 epoch: 9901 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 9902 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%

 epoch: 9903 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 66%|██████▌   | 9905/15000 [20:02<14:58,  5.67it/s]


 epoch: 9904 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9905 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 66%|██████▌   | 9907/15000 [20:02<12:34,  6.75it/s]


 epoch: 9906 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.0%

 epoch: 9907 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.2%


 66%|██████▌   | 9909/15000 [20:02<10:54,  7.78it/s]


 epoch: 9908 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

 epoch: 9909 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       be verified to have entered into regime where the laws of physics as we understand them specifically general relativity

target:     

 66%|██████▌   | 9911/15000 [20:03<11:03,  7.67it/s]

 be verified to have entered into regime where the laws of physics as we understand them specifically general relativity and

prediction:  be verified to have entered into regime where the laws of physics as we understand them specifically general relativity the

 epoch: 9910 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9911 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%


 66%|██████▌   | 9913/15000 [20:03<09:54,  8.56it/s]


 epoch: 9912 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9913 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 66%|██████▌   | 9915/15000 [20:03<09:33,  8.87it/s]


 epoch: 9914 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9915 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 66%|██████▌   | 9917/15000 [20:03<09:28,  8.94it/s]


 epoch: 9916 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 9917 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9918 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%


 66%|██████▌   | 9920/15000 [20:04<15:21,  5.51it/s]


 epoch: 9919 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

input:       they chose funny men as potential relationship partners even though they rated them as being less honest and intelligent

target:      they chose funny men as potential relationship partners even though they rated them as being less honest and intelligent post

prediction:  they chose funny men as potential relationship partners even though they rated them as being less honest and intelligent the

 epoch: 9920 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 66%|██████▌   | 9922/15000 [20:04<12:55,  6.54it/s]


 epoch: 9921 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9922 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 66%|██████▌   | 9924/15000 [20:04<11:34,  7.31it/s]


 epoch: 9923 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9924 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 66%|██████▌   | 9926/15000 [20:05<11:08,  7.59it/s]


 epoch: 9925 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9926 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 66%|██████▌   | 9927/15000 [20:05<11:07,  7.60it/s]


 epoch: 9927 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9928 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 66%|██████▌   | 9930/15000 [20:05<10:53,  7.76it/s]


 epoch: 9929 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

input:       cases use almost identical compositions geometric patterns and mythological scenes occur throughout the empire in north africa particularly rich

target:      cases use almost identical compositions geometric patterns and mythological scenes occur throughout the empire in north africa particularly rich source

prediction:  cases use almost identical compositions geometric patterns and mythological scenes occur throughout the empire in north africa particularly rich the

 epoch: 9930 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 66%|██████▌   | 9932/15000 [20:05<09:51,  8.56it/s]


 epoch: 9931 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9932 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 66%|██████▌   | 9934/15000 [20:06<11:57,  7.06it/s]


 epoch: 9933 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 9934 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%


 66%|██████▌   | 9936/15000 [20:06<10:27,  8.07it/s]


 epoch: 9935 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9936 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 66%|██████▋   | 9939/15000 [20:06<09:07,  9.24it/s]


 epoch: 9937 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9938 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9939 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 66%|██████▋   | 9940/15000 [20:06<09:56,  8.48it/s]


input:       used continental boundaries key blue states which straddle the border between europe and asia green countries not geographically in

target:      used continental boundaries key blue states which straddle the border between europe and asia green countries not geographically in europe

prediction:  used continental boundaries key blue states which straddle the border between europe and asia green countries not geographically in the

 epoch: 9940 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 96.9%

 epoch: 9941 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 66%|██████▋   | 9944/15000 [20:07<08:30,  9.90it/s]


 epoch: 9942 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 9943 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9944 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 66%|██████▋   | 9946/15000 [20:07<08:37,  9.77it/s]


 epoch: 9945 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%

 epoch: 9946 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 66%|██████▋   | 9949/15000 [20:08<13:47,  6.10it/s]


 epoch: 9947 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9948 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9949 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 66%|██████▋   | 9950/15000 [20:08<13:23,  6.28it/s]


input:       conditions starting in the united kingdom then subsequently spreading throughout europe north america and eventually the world the onset

target:      conditions starting in the united kingdom then subsequently spreading throughout europe north america and eventually the world the onset of

prediction:  conditions starting in the united kingdom then subsequently spreading throughout europe north america and eventually the world the onset the

 epoch: 9950 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 9951 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 66%|██████▋   | 9953/15000 [20:08<10:34,  7.95it/s]


 epoch: 9952 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

 epoch: 9953 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9954 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 66%|██████▋   | 9957/15000 [20:09<08:31,  9.86it/s]


 epoch: 9955 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9956 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9957 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 66%|██████▋   | 9959/15000 [20:09<08:31,  9.85it/s]


 epoch: 9958 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

 epoch: 9959 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

input:       tribes of siberia synthesized new russian empire extending to the ural mountains and beyond founded in citation needed in

target:      tribes of siberia synthesized new russian empire extending to the ural mountains and beyond founded in citation needed in sweden

prediction:  tribes of siberia synthesized new russian empire extending to the ural mountains and beyond founded in citation needed in the

 epoch: 9960 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 66%|██████▋   | 9962/15000 [20:09<13:25,  6.25it/s]


 epoch: 9961 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9962 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 9963 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 66%|██████▋   | 9966/15000 [20:10<10:02,  8.35it/s]


 epoch: 9964 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9965 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9966 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 66%|██████▋   | 9968/15000 [20:10<09:19,  9.00it/s]


 epoch: 9967 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9968 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 9969 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 66%|██████▋   | 9970/15000 [20:10<09:17,  9.02it/s]


input:       win the short struggle that followed by defeating macrinus at battle just outside antioch elagabalus was then accepted by

target:      win the short struggle that followed by defeating macrinus at battle just outside antioch elagabalus was then accepted by the

prediction:  win the short struggle that followed by defeating macrinus at battle just outside antioch elagabalus was then accepted by the

 epoch: 9970 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9971 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 66%|██████▋   | 9974/15000 [20:11<08:33,  9.80it/s]


 epoch: 9972 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 9973 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 9974 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 67%|██████▋   | 9977/15000 [20:11<12:16,  6.82it/s]


 epoch: 9975 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9976 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 9977 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 67%|██████▋   | 9978/15000 [20:11<11:35,  7.22it/s]


 epoch: 9978 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 9979 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

input:       somewhat different boundaries mainland greece to the north nowadays known as central greece consisted of aetolia and acarnania in

target:      somewhat different boundaries mainland greece to the north nowadays known as central greece consisted of aetolia and acarnania in the

prediction:  somewhat different boundaries mainland greece to the north nowadays known as central greece consisted of aetolia and acarnania in the


 67%|██████▋   | 9981/15000 [20:12<10:05,  8.28it/s]


 epoch: 9980 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 9981 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 9982 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 67%|██████▋   | 9985/15000 [20:12<08:36,  9.71it/s]


 epoch: 9983 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 9984 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9985 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 67%|██████▋   | 9987/15000 [20:12<08:28,  9.85it/s]


 epoch: 9986 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 9987 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 9988 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 67%|██████▋   | 9990/15000 [20:13<13:40,  6.11it/s]


 epoch: 9989 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       permanently moved the capital of the empire from rome to the city of byzantium modern day istanbul which was

target:      permanently moved the capital of the empire from rome to the city of byzantium modern day istanbul which was renamed

prediction:  permanently moved the capital of the empire from rome to the city of byzantium modern day istanbul which was the

 epoch: 9990 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 67%|██████▋   | 9992/15000 [20:13<11:51,  7.04it/s]


 epoch: 9991 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 9992 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%


 67%|██████▋   | 9995/15000 [20:14<09:44,  8.56it/s]


 epoch: 9993 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 9994 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 9995 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 67%|██████▋   | 9998/15000 [20:14<08:35,  9.70it/s]


 epoch: 9996 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 9997 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 9998 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 67%|██████▋   | 10000/15000 [20:14<08:49,  9.44it/s]


 epoch: 9999 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

input:       include aspects like socioeconomic status ethnicity and gender the sociology of education studies how these factors together with the

target:      include aspects like socioeconomic status ethnicity and gender the sociology of education studies how these factors together with the dominant

prediction:  include aspects like socioeconomic status ethnicity and gender the sociology of education studies how these factors together with the the

 epoch: 10000 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%


 67%|██████▋   | 10003/15000 [20:14<08:36,  9.67it/s]


 epoch: 10001 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10002 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%

 epoch: 10003 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 67%|██████▋   | 10005/15000 [20:15<14:45,  5.64it/s]


 epoch: 10004 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10005 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 67%|██████▋   | 10007/15000 [20:15<12:23,  6.71it/s]


 epoch: 10006 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10007 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 67%|██████▋   | 10009/15000 [20:15<11:06,  7.49it/s]


 epoch: 10008 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10009 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

input:       currency exchange rates was almost as large as that of the rest of asia combined in japan economy nearly


 67%|██████▋   | 10011/15000 [20:16<11:11,  7.43it/s]


target:      currency exchange rates was almost as large as that of the rest of asia combined in japan economy nearly equaled

prediction:  currency exchange rates was almost as large as that of the rest of asia combined in japan economy nearly the

 epoch: 10010 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10011 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%


 67%|██████▋   | 10013/15000 [20:16<10:12,  8.14it/s]


 epoch: 10012 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10013 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 67%|██████▋   | 10015/15000 [20:16<09:55,  8.37it/s]


 epoch: 10014 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10015 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 67%|██████▋   | 10017/15000 [20:16<09:40,  8.59it/s]


 epoch: 10016 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%

 epoch: 10017 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 67%|██████▋   | 10019/15000 [20:17<14:10,  5.86it/s]


 epoch: 10018 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10019 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 67%|██████▋   | 10020/15000 [20:17<14:26,  5.75it/s]


input:       system where the player can give them gold ingots and receive items in return the player can also build

target:      system where the player can give them gold ingots and receive items in return the player can also build an

prediction:  system where the player can give them gold ingots and receive items in return the player can also build the

 epoch: 10020 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10021 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 67%|██████▋   | 10022/15000 [20:17<11:47,  7.04it/s]



 epoch: 10022 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 67%|██████▋   | 10024/15000 [20:18<10:45,  7.71it/s]


 epoch: 10023 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10024 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 67%|██████▋   | 10026/15000 [20:18<10:27,  7.92it/s]


 epoch: 10025 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10026 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 67%|██████▋   | 10028/15000 [20:18<10:32,  7.86it/s]


 epoch: 10027 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%

 epoch: 10028 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 67%|██████▋   | 10030/15000 [20:18<12:11,  6.79it/s]


 epoch: 10029 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

input:       largely abandoned due to lack of grazing land cats dogs and monkeys were common family pets while more exotic

target:      largely abandoned due to lack of grazing land cats dogs and monkeys were common family pets while more exotic pets

prediction:  largely abandoned due to lack of grazing land cats dogs and monkeys were common family pets while more exotic the

 epoch: 10030 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 67%|██████▋   | 10031/15000 [20:19<11:37,  7.12it/s]


 epoch: 10031 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 67%|██████▋   | 10033/15000 [20:19<18:50,  4.39it/s]


 epoch: 10032 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10033 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%


 67%|██████▋   | 10036/15000 [20:20<12:12,  6.78it/s]


 epoch: 10034 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.2%

 epoch: 10035 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%

 epoch: 10036 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 67%|██████▋   | 10038/15000 [20:20<10:39,  7.76it/s]


 epoch: 10037 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10038 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%


 67%|██████▋   | 10040/15000 [20:20<11:01,  7.50it/s]


 epoch: 10039 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

input:       many have eventually fallen into disuse although there have been attempts to design one universal programming language that serves

target:      many have eventually fallen into disuse although there have been attempts to design one universal programming language that serves all

prediction:  many have eventually fallen into disuse although there have been attempts to design one universal programming language that serves the

 epoch: 10040 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10041 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 67%|██████▋   | 10044/15000 [20:20<08:44,  9.44it/s]


 epoch: 10042 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10043 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10044 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 67%|██████▋   | 10045/15000 [20:20<08:45,  9.43it/s]


 epoch: 10045 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 67%|██████▋   | 10047/15000 [20:21<11:08,  7.41it/s]


 epoch: 10046 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10047 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 67%|██████▋   | 10049/15000 [20:21<09:42,  8.50it/s]


 epoch: 10048 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10049 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

input:       accusative type or patients ergative type or even making each of the three roles differently which is called the

target:      accusative type or patients ergative type or even making each of the three roles differently which is called the tripartite

prediction:  accusative type or patients ergative type or even making each of the three roles differently which is called the the


 67%|██████▋   | 10051/15000 [20:21<09:30,  8.68it/s]


 epoch: 10050 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10051 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10052 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 67%|██████▋   | 10055/15000 [20:22<08:19,  9.90it/s]


 epoch: 10053 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10054 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10055 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 67%|██████▋   | 10057/15000 [20:22<08:28,  9.72it/s]


 epoch: 10056 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10057 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 67%|██████▋   | 10059/15000 [20:22<08:27,  9.74it/s]


 epoch: 10058 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 10059 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

input:       on april gathered in philadelphia following the war outbreak delegates from the thirteen colonies established the continental army from

target:      on april gathered in philadelphia following the war outbreak delegates from the thirteen colonies established the continental army from various


 67%|██████▋   | 10060/15000 [20:22<09:31,  8.64it/s]


prediction:  on april gathered in philadelphia following the war outbreak delegates from the thirteen colonies established the continental army from the

 epoch: 10060 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 67%|██████▋   | 10063/15000 [20:23<13:59,  5.88it/s]


 epoch: 10061 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10062 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10063 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 67%|██████▋   | 10066/15000 [20:23<10:24,  7.90it/s]


 epoch: 10064 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10065 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 10066 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 67%|██████▋   | 10068/15000 [20:23<09:19,  8.81it/s]


 epoch: 10067 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10068 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10069 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 67%|██████▋   | 10071/15000 [20:24<09:02,  9.08it/s]


input:       to govern outside italy among these officials were the roman governors magistrates elected at rome who in the name

target:      to govern outside italy among these officials were the roman governors magistrates elected at rome who in the name of

prediction:  to govern outside italy among these officials were the roman governors magistrates elected at rome who in the name the

 epoch: 10070 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10071 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 67%|██████▋   | 10074/15000 [20:24<08:26,  9.73it/s]


 epoch: 10072 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10073 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10074 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 67%|██████▋   | 10077/15000 [20:25<12:42,  6.46it/s]


 epoch: 10075 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10076 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10077 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 67%|██████▋   | 10079/15000 [20:25<11:05,  7.39it/s]


 epoch: 10078 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10079 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 96.9%

input:       temples and the reinstitution of alienated temple properties and more problematically for the christian church the recalling of previously

target:      temples and the reinstitution of alienated temple properties and more problematically for the christian church the recalling of previously exiled


 67%|██████▋   | 10080/15000 [20:25<11:30,  7.13it/s]


prediction:  temples and the reinstitution of alienated temple properties and more problematically for the christian church the recalling of previously the

 epoch: 10080 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10081 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 67%|██████▋   | 10083/15000 [20:25<09:39,  8.49it/s]


 epoch: 10082 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10083 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.5%


 67%|██████▋   | 10085/15000 [20:26<09:03,  9.05it/s]


 epoch: 10084 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10085 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.2%

 epoch: 10086 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 67%|██████▋   | 10087/15000 [20:26<08:46,  9.33it/s]



 epoch: 10087 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.22, test_acc: 96.9%


 67%|██████▋   | 10088/15000 [20:26<08:44,  9.37it/s]


 epoch: 10088 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 67%|██████▋   | 10090/15000 [20:26<14:08,  5.78it/s]


 epoch: 10089 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       for most of his regime problems military coup drove nero into hiding facing execution at the hands of the

target:      for most of his regime problems military coup drove nero into hiding facing execution at the hands of the roman

prediction:  for most of his regime problems military coup drove nero into hiding facing execution at the hands of the the

 epoch: 10090 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 67%|██████▋   | 10093/15000 [20:27<10:06,  8.09it/s]


 epoch: 10091 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10092 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10093 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 67%|██████▋   | 10095/15000 [20:27<09:13,  8.86it/s]


 epoch: 10094 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10095 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10096 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 67%|██████▋   | 10099/15000 [20:27<08:12,  9.95it/s]


 epoch: 10097 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10098 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10099 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 67%|██████▋   | 10101/15000 [20:27<08:41,  9.39it/s]


input:       those forces wintered in syria where they became attracted to the young elagabalus after months of mild rebellion by

target:      those forces wintered in syria where they became attracted to the young elagabalus after months of mild rebellion by the

prediction:  those forces wintered in syria where they became attracted to the young elagabalus after months of mild rebellion by the

 epoch: 10100 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10101 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 67%|██████▋   | 10102/15000 [20:28<08:37,  9.46it/s]


 epoch: 10102 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 67%|██████▋   | 10104/15000 [20:28<14:56,  5.46it/s]


 epoch: 10103 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 10104 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10105 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.3%


 67%|██████▋   | 10108/15000 [20:29<10:11,  8.00it/s]


 epoch: 10106 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%

 epoch: 10107 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10108 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 67%|██████▋   | 10110/15000 [20:29<09:45,  8.35it/s]


 epoch: 10109 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       more than countries with audiovisual traditions there is no one single african cinema both historically and culturally there are

target:      more than countries with audiovisual traditions there is no one single african cinema both historically and culturally there are major

prediction:  more than countries with audiovisual traditions there is no one single african cinema both historically and culturally there are the

 epoch: 10110 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 67%|██████▋   | 10113/15000 [20:29<08:55,  9.12it/s]


 epoch: 10111 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10112 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10113 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 67%|██████▋   | 10115/15000 [20:29<09:24,  8.66it/s]


 epoch: 10114 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10115 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%


 67%|██████▋   | 10117/15000 [20:30<10:05,  8.07it/s]


 epoch: 10116 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10117 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 67%|██████▋   | 10119/15000 [20:30<15:58,  5.09it/s]


 epoch: 10118 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10119 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 67%|██████▋   | 10121/15000 [20:31<13:28,  6.03it/s]


input:       and north atlantic drift warm the continent southern europe has warm but mild climate there are frequent summer droughts

target:      and north atlantic drift warm the continent southern europe has warm but mild climate there are frequent summer droughts in

prediction:  and north atlantic drift warm the continent southern europe has warm but mild climate there are frequent summer droughts the

 epoch: 10120 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10121 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 67%|██████▋   | 10123/15000 [20:31<11:39,  6.97it/s]


 epoch: 10122 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10123 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 68%|██████▊   | 10125/15000 [20:31<10:50,  7.49it/s]


 epoch: 10124 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.3%

 epoch: 10125 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 68%|██████▊   | 10127/15000 [20:31<10:03,  8.08it/s]


 epoch: 10126 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10127 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 68%|██████▊   | 10129/15000 [20:31<09:30,  8.53it/s]


 epoch: 10128 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10129 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

input:       in england and estyn reports on performance in wales in the united kingdom most schools are publicly funded and


 68%|██████▊   | 10131/15000 [20:32<10:44,  7.55it/s]


target:      in england and estyn reports on performance in wales in the united kingdom most schools are publicly funded and known

prediction:  in england and estyn reports on performance in wales in the united kingdom most schools are publicly funded and the

 epoch: 10130 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10131 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 68%|██████▊   | 10133/15000 [20:33<18:18,  4.43it/s]


 epoch: 10132 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10133 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 68%|██████▊   | 10135/15000 [20:33<13:46,  5.88it/s]


 epoch: 10134 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10135 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 68%|██████▊   | 10137/15000 [20:33<11:13,  7.22it/s]


 epoch: 10136 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10137 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 68%|██████▊   | 10138/15000 [20:33<10:18,  7.87it/s]


 epoch: 10138 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10139 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

input:       the game very highly and saying that global offensive is polished and better looking version of the game gamespot

target:      the game very highly and saying that global offensive is polished and better looking version of the game gamespot writer

prediction:  the game very highly and saying that global offensive is polished and better looking version of the game gamespot the


 68%|██████▊   | 10141/15000 [20:33<09:25,  8.60it/s]


 epoch: 10140 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10141 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 68%|██████▊   | 10143/15000 [20:34<08:53,  9.10it/s]


 epoch: 10142 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10143 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 68%|██████▊   | 10145/15000 [20:34<08:35,  9.41it/s]


 epoch: 10144 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10145 | train_loss: 0.20, train_acc: 97.5% | test_loss: 0.25, test_acc: 97.0%


 68%|██████▊   | 10148/15000 [20:34<13:03,  6.19it/s]


 epoch: 10146 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10147 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10148 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 68%|██████▊   | 10150/15000 [20:35<11:39,  6.93it/s]


 epoch: 10149 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

input:       his murder in orchestrated by his enemies in the senate stephanus the steward of the deceased julia flavia members

target:      his murder in orchestrated by his enemies in the senate stephanus the steward of the deceased julia flavia members of

prediction:  his murder in orchestrated by his enemies in the senate stephanus the steward of the deceased julia flavia members the

 epoch: 10150 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%


 68%|██████▊   | 10153/15000 [20:35<09:14,  8.75it/s]


 epoch: 10151 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10152 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10153 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 68%|██████▊   | 10156/15000 [20:35<08:34,  9.42it/s]


 epoch: 10154 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10155 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10156 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 68%|██████▊   | 10159/15000 [20:36<08:06,  9.95it/s]


 epoch: 10157 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10158 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10159 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 68%|██████▊   | 10160/15000 [20:36<10:42,  7.53it/s]


input:       joined the eurozone replacing their national currencies by the euro figures released by eurostat in confirmed that the eurozone

target:      joined the eurozone replacing their national currencies by the euro figures released by eurostat in confirmed that the eurozone had

prediction:  joined the eurozone replacing their national currencies by the euro figures released by eurostat in confirmed that the eurozone the

 epoch: 10160 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%


 68%|██████▊   | 10162/15000 [20:36<09:48,  8.22it/s]


 epoch: 10161 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10162 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 68%|██████▊   | 10164/15000 [20:36<09:11,  8.76it/s]


 epoch: 10163 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 10164 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10165 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 68%|██████▊   | 10168/15000 [20:37<07:55, 10.16it/s]


 epoch: 10166 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10167 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10168 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 68%|██████▊   | 10170/15000 [20:37<08:17,  9.70it/s]


 epoch: 10169 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

input:       east west schism in split the former roman empire religiously with the eastern orthodox church in the byzantine empire

target:      east west schism in split the former roman empire religiously with the eastern orthodox church in the byzantine empire and

prediction:  east west schism in split the former roman empire religiously with the eastern orthodox church in the byzantine empire the

 epoch: 10170 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 68%|██████▊   | 10172/15000 [20:37<08:38,  9.31it/s]


 epoch: 10171 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10172 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 68%|██████▊   | 10173/15000 [20:37<08:40,  9.27it/s]


 epoch: 10173 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 68%|██████▊   | 10175/15000 [20:38<15:18,  5.25it/s]


 epoch: 10174 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10175 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 68%|██████▊   | 10178/15000 [20:38<11:04,  7.26it/s]


 epoch: 10176 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10177 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10178 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 68%|██████▊   | 10180/15000 [20:38<10:57,  7.33it/s]


 epoch: 10179 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       whom the evening meal cena had important social functions guests were entertained in finely decorated dining room triclinium furnished

target:      whom the evening meal cena had important social functions guests were entertained in finely decorated dining room triclinium furnished with

prediction:  whom the evening meal cena had important social functions guests were entertained in finely decorated dining room triclinium furnished the

 epoch: 10180 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 68%|██████▊   | 10183/15000 [20:39<08:50,  9.07it/s]


 epoch: 10181 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10182 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10183 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 68%|██████▊   | 10185/15000 [20:39<08:38,  9.28it/s]


 epoch: 10184 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10185 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 68%|██████▊   | 10187/15000 [20:39<08:30,  9.43it/s]


 epoch: 10186 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10187 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 68%|██████▊   | 10188/15000 [20:40<18:48,  4.26it/s]


 epoch: 10188 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 10189 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

input:       the southwest of the region in the australian landmass the climate is mostly desert or semi arid with the

target:      the southwest of the region in the australian landmass the climate is mostly desert or semi arid with the southern

prediction:  the southwest of the region in the australian landmass the climate is mostly desert or semi arid with the the


 68%|██████▊   | 10192/15000 [20:40<11:33,  6.93it/s]


 epoch: 10190 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10191 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 10192 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 68%|██████▊   | 10195/15000 [20:40<09:44,  8.22it/s]


 epoch: 10193 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10194 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10195 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 68%|██████▊   | 10197/15000 [20:41<09:09,  8.74it/s]


 epoch: 10196 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.0%

 epoch: 10197 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%

 epoch: 10198 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.24, test_acc: 97.2%


 68%|██████▊   | 10200/15000 [20:41<08:46,  9.11it/s]


 epoch: 10199 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       related to mineral growth under stress this can remove signs of the original textures of the rocks such as

target:      related to mineral growth under stress this can remove signs of the original textures of the rocks such as bedding

prediction:  related to mineral growth under stress this can remove signs of the original textures of the rocks such as the

 epoch: 10200 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 68%|██████▊   | 10201/15000 [20:41<08:45,  9.13it/s]


 epoch: 10201 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 68%|██████▊   | 10204/15000 [20:41<10:39,  7.50it/s]


 epoch: 10202 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 10203 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10204 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 68%|██████▊   | 10206/15000 [20:42<09:22,  8.53it/s]


 epoch: 10205 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 10206 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10207 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 68%|██████▊   | 10209/15000 [20:42<08:35,  9.29it/s]


 epoch: 10208 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 10209 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

input:       of medicine this intellectual school revolutionized medicine in ancient greece establishing it as discipline distinct from other fields that

target:      of medicine this intellectual school revolutionized medicine in ancient greece establishing it as discipline distinct from other fields that it

prediction:  of medicine this intellectual school revolutionized medicine in ancient greece establishing it as discipline distinct from other fields that the


 68%|██████▊   | 10211/15000 [20:42<09:06,  8.76it/s]


 epoch: 10210 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10211 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 68%|██████▊   | 10213/15000 [20:42<08:44,  9.12it/s]


 epoch: 10212 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10213 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 68%|██████▊   | 10215/15000 [20:43<08:33,  9.31it/s]


 epoch: 10214 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 10215 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 68%|██████▊   | 10217/15000 [20:43<16:57,  4.70it/s]


 epoch: 10216 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 10217 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%


 68%|██████▊   | 10219/15000 [20:44<13:32,  5.88it/s]


 epoch: 10218 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10219 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 68%|██████▊   | 10220/15000 [20:44<13:42,  5.81it/s]


input:       for some early features and the differences between the console and pc versions since its release it has drawn

target:      for some early features and the differences between the console and pc versions since its release it has drawn in

prediction:  for some early features and the differences between the console and pc versions since its release it has drawn the

 epoch: 10220 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 68%|██████▊   | 10222/15000 [20:44<11:23,  6.99it/s]


 epoch: 10221 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10222 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 68%|██████▊   | 10224/15000 [20:44<10:40,  7.46it/s]


 epoch: 10223 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10224 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 68%|██████▊   | 10226/15000 [20:45<10:03,  7.91it/s]


 epoch: 10225 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%

 epoch: 10226 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.1%


 68%|██████▊   | 10228/15000 [20:45<10:16,  7.74it/s]


 epoch: 10227 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10228 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 68%|██████▊   | 10229/15000 [20:45<10:27,  7.60it/s]


 epoch: 10229 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       unsuccessfully to invade spain allowing philip ii of spain to maintain his dominant war capacity in europe this english

target:      unsuccessfully to invade spain allowing philip ii of spain to maintain his dominant war capacity in europe this english disaster

prediction:  unsuccessfully to invade spain allowing philip ii of spain to maintain his dominant war capacity in europe this english the


 68%|██████▊   | 10231/15000 [20:46<17:43,  4.48it/s]


 epoch: 10230 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10231 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 68%|██████▊   | 10233/15000 [20:46<13:34,  5.85it/s]


 epoch: 10232 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10233 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 68%|██████▊   | 10235/15000 [20:46<11:49,  6.71it/s]


 epoch: 10234 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10235 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 68%|██████▊   | 10238/15000 [20:46<09:28,  8.37it/s]


 epoch: 10236 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10237 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10238 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 68%|██████▊   | 10240/15000 [20:47<09:57,  7.96it/s]


 epoch: 10239 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

input:       most people the exchange of goods occurred through social relationships there were also traders who bartered in the marketplaces

target:      most people the exchange of goods occurred through social relationships there were also traders who bartered in the marketplaces in

prediction:  most people the exchange of goods occurred through social relationships there were also traders who bartered in the marketplaces the

 epoch: 10240 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 68%|██████▊   | 10242/15000 [20:47<09:30,  8.34it/s]


 epoch: 10241 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10242 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 68%|██████▊   | 10244/15000 [20:47<09:04,  8.74it/s]


 epoch: 10243 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10244 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 68%|██████▊   | 10246/15000 [20:48<15:50,  5.00it/s]


 epoch: 10245 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 10246 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10247 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 68%|██████▊   | 10250/15000 [20:48<10:12,  7.76it/s]


 epoch: 10248 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10249 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       heavily on economic indicators like the gdp and gdp per capita while often useful gdp only includes economic activity

target:      heavily on economic indicators like the gdp and gdp per capita while often useful gdp only includes economic activity for

prediction:  heavily on economic indicators like the gdp and gdp per capita while often useful gdp only includes economic activity the

 epoch: 10250 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 68%|██████▊   | 10253/15000 [20:48<08:44,  9.04it/s]


 epoch: 10251 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10252 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10253 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


 68%|██████▊   | 10256/15000 [20:49<08:15,  9.58it/s]


 epoch: 10254 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10255 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10256 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 68%|██████▊   | 10257/15000 [20:49<08:19,  9.50it/s]


 epoch: 10257 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10258 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 68%|██████▊   | 10260/15000 [20:50<14:37,  5.40it/s]


 epoch: 10259 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.0%

input:       people who lived on these islands was often distinct from that of asia and pre columbian america hence lack

target:      people who lived on these islands was often distinct from that of asia and pre columbian america hence lack of

prediction:  people who lived on these islands was often distinct from that of asia and pre columbian america hence lack the

 epoch: 10260 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 68%|██████▊   | 10262/15000 [20:50<11:50,  6.67it/s]


 epoch: 10261 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%

 epoch: 10262 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10263 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.3%


 68%|██████▊   | 10266/15000 [20:50<08:56,  8.82it/s]


 epoch: 10264 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10265 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10266 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 68%|██████▊   | 10268/15000 [20:50<08:28,  9.30it/s]


 epoch: 10267 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10268 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%

 epoch: 10269 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.2%


 68%|██████▊   | 10271/15000 [20:51<08:24,  9.38it/s]


input:       in connection with votives dedicated by ordinary people as well as magic spells eg the greek magical papyri books

target:      in connection with votives dedicated by ordinary people as well as magic spells eg the greek magical papyri books were

prediction:  in connection with votives dedicated by ordinary people as well as magic spells eg the greek magical papyri books the

 epoch: 10270 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10271 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 68%|██████▊   | 10272/15000 [20:51<08:18,  9.48it/s]


 epoch: 10272 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%


 68%|██████▊   | 10274/15000 [20:51<09:55,  7.93it/s]


 epoch: 10273 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10274 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10275 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 69%|██████▊   | 10278/15000 [20:51<08:03,  9.76it/s]


 epoch: 10276 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10277 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10278 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 69%|██████▊   | 10280/15000 [20:52<08:20,  9.43it/s]


 epoch: 10279 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

input:       consist of styles making styles in total the two types are adaptive versus maladaptive humour adaptive humour consist of

target:      consist of styles making styles in total the two types are adaptive versus maladaptive humour adaptive humour consist of facilitative

prediction:  consist of styles making styles in total the two types are adaptive versus maladaptive humour adaptive humour consist of the

 epoch: 10280 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 69%|██████▊   | 10282/15000 [20:52<07:48, 10.06it/s]


 epoch: 10281 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10282 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10283 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 69%|██████▊   | 10286/15000 [20:52<07:25, 10.58it/s]


 epoch: 10284 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10285 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10286 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 69%|██████▊   | 10288/15000 [20:53<12:39,  6.20it/s]


 epoch: 10287 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10288 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10289 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 69%|██████▊   | 10290/15000 [20:53<11:21,  6.91it/s]


input:       form and semantics meaning which are usually defined by formal language some languages are defined by specification document for

target:      form and semantics meaning which are usually defined by formal language some languages are defined by specification document for example

prediction:  form and semantics meaning which are usually defined by formal language some languages are defined by specification document for the

 epoch: 10290 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10291 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 69%|██████▊   | 10294/15000 [20:53<08:59,  8.72it/s]


 epoch: 10292 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10293 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10294 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 69%|██████▊   | 10296/15000 [20:54<08:22,  9.35it/s]


 epoch: 10295 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10296 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 10297 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 69%|██████▊   | 10298/15000 [20:54<08:10,  9.59it/s]


 epoch: 10298 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10299 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

input:       made public in may before being fully released in november with notch stepping down and jens jeb bergensten taking

target:      made public in may before being fully released in november with notch stepping down and jens jeb bergensten taking over

prediction:  made public in may before being fully released in november with notch stepping down and jens jeb bergensten taking the


 69%|██████▊   | 10300/15000 [20:54<08:34,  9.14it/s]


 epoch: 10300 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%


 69%|██████▊   | 10303/15000 [20:55<12:01,  6.51it/s]


 epoch: 10301 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10302 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10303 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.9%


 69%|██████▊   | 10305/15000 [20:55<10:24,  7.52it/s]


 epoch: 10304 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10305 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10306 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 69%|██████▊   | 10309/15000 [20:55<08:31,  9.16it/s]


 epoch: 10307 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10308 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10309 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 69%|██████▊   | 10311/15000 [20:56<08:37,  9.06it/s]


input:       country it also applies to secondary education with secondary schools often divided between gymnasiums and vocational schools which again

target:      country it also applies to secondary education with secondary schools often divided between gymnasiums and vocational schools which again depending

prediction:  country it also applies to secondary education with secondary schools often divided between gymnasiums and vocational schools which again the

 epoch: 10310 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10311 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 69%|██████▉   | 10313/15000 [20:56<08:06,  9.64it/s]


 epoch: 10312 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10313 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10314 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 69%|██████▉   | 10317/15000 [20:56<08:28,  9.21it/s]


 epoch: 10315 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10316 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10317 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 69%|██████▉   | 10319/15000 [20:56<08:34,  9.10it/s]


 epoch: 10318 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10319 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 69%|██████▉   | 10320/15000 [20:57<09:38,  8.08it/s]


input:       dynamism particularly east asia as well as robust population growth during the th century but overall population growth has

target:      dynamism particularly east asia as well as robust population growth during the th century but overall population growth has since

prediction:  dynamism particularly east asia as well as robust population growth during the th century but overall population growth has the

 epoch: 10320 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 69%|██████▉   | 10323/15000 [20:57<09:04,  8.59it/s]


 epoch: 10321 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10322 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10323 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 69%|██████▉   | 10325/15000 [20:57<09:24,  8.29it/s]


 epoch: 10324 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10325 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 69%|██████▉   | 10327/15000 [20:57<09:52,  7.88it/s]


 epoch: 10326 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10327 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 69%|██████▉   | 10329/15000 [20:58<09:45,  7.98it/s]


 epoch: 10328 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10329 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

input:       s has the largest economy of all three countries and in the world in the had an estimated

target:      s has the largest economy of all three countries and in the world in the had an estimated per

prediction:  s has the largest economy of all three countries and in the world in the had an estimated the


 69%|██████▉   | 10331/15000 [20:58<16:07,  4.82it/s]


 epoch: 10330 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10331 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 69%|██████▉   | 10333/15000 [20:59<12:51,  6.05it/s]


 epoch: 10332 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10333 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 69%|██████▉   | 10335/15000 [20:59<10:53,  7.13it/s]


 epoch: 10334 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10335 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 69%|██████▉   | 10337/15000 [20:59<09:52,  7.88it/s]


 epoch: 10336 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10337 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 69%|██████▉   | 10339/15000 [20:59<09:20,  8.31it/s]


 epoch: 10338 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10339 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       the united states government standardized ada systems programming language derived from pascal and intended for use by defense contractors


 69%|██████▉   | 10341/15000 [21:00<10:16,  7.55it/s]


target:      the united states government standardized ada systems programming language derived from pascal and intended for use by defense contractors in

prediction:  the united states government standardized ada systems programming language derived from pascal and intended for use by defense contractors the

 epoch: 10340 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10341 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 69%|██████▉   | 10343/15000 [21:00<10:18,  7.53it/s]


 epoch: 10342 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10343 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.8%


 69%|██████▉   | 10346/15000 [21:01<14:09,  5.48it/s]


 epoch: 10344 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10345 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10346 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 69%|██████▉   | 10348/15000 [21:01<11:16,  6.88it/s]


 epoch: 10347 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10348 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 10349 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 69%|██████▉   | 10350/15000 [21:01<10:18,  7.52it/s]


input:       needs and carl rogers who created and developed client centered therapy later positive psychology opened up humanistic themes to

target:      needs and carl rogers who created and developed client centered therapy later positive psychology opened up humanistic themes to scientific

prediction:  needs and carl rogers who created and developed client centered therapy later positive psychology opened up humanistic themes to the

 epoch: 10350 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10351 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 69%|██████▉   | 10354/15000 [21:01<08:29,  9.12it/s]


 epoch: 10352 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10353 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10354 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 69%|██████▉   | 10357/15000 [21:02<08:01,  9.64it/s]


 epoch: 10355 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10356 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10357 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 69%|██████▉   | 10360/15000 [21:02<12:17,  6.29it/s]


 epoch: 10358 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10359 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       the southern cone located in the middle latitudes the continent cultural and ethnic outlook has its origin with the

target:      the southern cone located in the middle latitudes the continent cultural and ethnic outlook has its origin with the interaction

prediction:  the southern cone located in the middle latitudes the continent cultural and ethnic outlook has its origin with the the

 epoch: 10360 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 69%|██████▉   | 10362/15000 [21:03<10:31,  7.35it/s]


 epoch: 10361 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10362 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.9%

 epoch: 10363 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 69%|██████▉   | 10366/15000 [21:03<08:21,  9.24it/s]


 epoch: 10364 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10365 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10366 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 69%|██████▉   | 10368/15000 [21:03<08:08,  9.48it/s]


 epoch: 10367 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10368 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10369 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 69%|██████▉   | 10370/15000 [21:03<08:25,  9.15it/s]


input:       took the lead with several victories until the triple alliance organized to repel the invaders and fight effectively this

target:      took the lead with several victories until the triple alliance organized to repel the invaders and fight effectively this was

prediction:  took the lead with several victories until the triple alliance organized to repel the invaders and fight effectively this the

 epoch: 10370 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10371 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 69%|██████▉   | 10374/15000 [21:04<11:17,  6.83it/s]


 epoch: 10372 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10373 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10374 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%


 69%|██████▉   | 10376/15000 [21:04<09:58,  7.72it/s]


 epoch: 10375 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10376 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10377 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 69%|██████▉   | 10380/15000 [21:05<08:36,  8.94it/s]


 epoch: 10378 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10379 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

input:       definition of oceania since its foundation in which utilizes four of the five subregions from the th century australasia

target:      definition of oceania since its foundation in which utilizes four of the five subregions from the th century australasia melanesia

prediction:  definition of oceania since its foundation in which utilizes four of the five subregions from the th century australasia the

 epoch: 10380 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 69%|██████▉   | 10382/15000 [21:05<08:06,  9.50it/s]


 epoch: 10381 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10382 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10383 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 69%|██████▉   | 10386/15000 [21:05<07:42,  9.99it/s]


 epoch: 10384 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 10385 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 10386 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 69%|██████▉   | 10388/15000 [21:06<12:31,  6.14it/s]


 epoch: 10387 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10388 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10389 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%


 69%|██████▉   | 10390/15000 [21:06<11:36,  6.62it/s]


input:       interpersonal lacanian and relational psychoanalysis psychologists such as hans eysenck and philosophers including karl popper sharply criticized psychoanalysis popper

target:      interpersonal lacanian and relational psychoanalysis psychologists such as hans eysenck and philosophers including karl popper sharply criticized psychoanalysis popper argued

prediction:  interpersonal lacanian and relational psychoanalysis psychologists such as hans eysenck and philosophers including karl popper sharply criticized psychoanalysis popper the

 epoch: 10390 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%


 69%|██████▉   | 10393/15000 [21:06<09:42,  7.91it/s]


 epoch: 10391 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10392 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10393 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 69%|██████▉   | 10395/15000 [21:07<08:47,  8.73it/s]


 epoch: 10394 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10395 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10396 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.27, test_acc: 96.9%


 69%|██████▉   | 10398/15000 [21:07<08:13,  9.32it/s]


 epoch: 10397 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10398 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10399 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 69%|██████▉   | 10400/15000 [21:07<08:27,  9.06it/s]


input:       sculptures of human heads and other subjects jade jewelry and other olmec objects are found throughout mesoamerica likely having

target:      sculptures of human heads and other subjects jade jewelry and other olmec objects are found throughout mesoamerica likely having travelled

prediction:  sculptures of human heads and other subjects jade jewelry and other olmec objects are found throughout mesoamerica likely having the

 epoch: 10400 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 69%|██████▉   | 10403/15000 [21:08<12:06,  6.32it/s]


 epoch: 10401 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 10402 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10403 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 69%|██████▉   | 10406/15000 [21:08<09:30,  8.05it/s]


 epoch: 10404 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.8%

 epoch: 10405 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10406 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%


 69%|██████▉   | 10409/15000 [21:08<08:18,  9.20it/s]


 epoch: 10407 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10408 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10409 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 69%|██████▉   | 10411/15000 [21:09<08:31,  8.98it/s]


input:       the process as the shared experience of common world this shared experience involves discovery as well as posing and

target:      the process as the shared experience of common world this shared experience involves discovery as well as posing and solving

prediction:  the process as the shared experience of common world this shared experience involves discovery as well as posing and the

 epoch: 10410 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10411 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 69%|██████▉   | 10414/15000 [21:09<07:56,  9.62it/s]


 epoch: 10412 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10413 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10414 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 69%|██████▉   | 10417/15000 [21:10<11:49,  6.46it/s]


 epoch: 10415 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10416 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10417 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%


 69%|██████▉   | 10418/15000 [21:10<10:57,  6.97it/s]


 epoch: 10418 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10419 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       later initially molten the outer layer of the earth cooled resulting in the solid crust outgassing and volcanic activity

target:      later initially molten the outer layer of the earth cooled resulting in the solid crust outgassing and volcanic activity produced

prediction:  later initially molten the outer layer of the earth cooled resulting in the solid crust outgassing and volcanic activity the


 69%|██████▉   | 10421/15000 [21:10<09:23,  8.13it/s]


 epoch: 10420 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10421 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 69%|██████▉   | 10423/15000 [21:10<09:01,  8.46it/s]


 epoch: 10422 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10423 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 70%|██████▉   | 10425/15000 [21:11<08:53,  8.58it/s]


 epoch: 10424 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10425 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%


 70%|██████▉   | 10427/15000 [21:11<08:41,  8.76it/s]


 epoch: 10426 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10427 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 70%|██████▉   | 10429/15000 [21:11<09:18,  8.18it/s]


 epoch: 10428 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10429 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 70%|██████▉   | 10430/15000 [21:11<10:22,  7.34it/s]


input:       internally linus torvalds has responded that the hash was mostly to guard against accidental corruption and the security cryptographically

target:      internally linus torvalds has responded that the hash was mostly to guard against accidental corruption and the security cryptographically secure

prediction:  internally linus torvalds has responded that the hash was mostly to guard against accidental corruption and the security cryptographically the

 epoch: 10430 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10431 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 70%|██████▉   | 10433/15000 [21:12<08:59,  8.46it/s]


 epoch: 10432 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10433 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 70%|██████▉   | 10435/15000 [21:12<09:14,  8.23it/s]


 epoch: 10434 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10435 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 70%|██████▉   | 10437/15000 [21:12<09:19,  8.15it/s]


 epoch: 10436 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10437 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 70%|██████▉   | 10439/15000 [21:12<09:10,  8.29it/s]


 epoch: 10438 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10439 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 70%|██████▉   | 10440/15000 [21:12<10:43,  7.09it/s]


input:       the year is generally accepted as the formal end of the western roman empire that year orestes having stolen

target:      the year is generally accepted as the formal end of the western roman empire that year orestes having stolen power

prediction:  the year is generally accepted as the formal end of the western roman empire that year orestes having stolen the

 epoch: 10440 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 70%|██████▉   | 10442/15000 [21:13<10:06,  7.52it/s]


 epoch: 10441 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10442 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%


 70%|██████▉   | 10443/15000 [21:13<10:07,  7.50it/s]


 epoch: 10443 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 70%|██████▉   | 10445/15000 [21:14<17:28,  4.35it/s]


 epoch: 10444 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10445 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 70%|██████▉   | 10447/15000 [21:14<13:12,  5.75it/s]


 epoch: 10446 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10447 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 70%|██████▉   | 10449/15000 [21:14<11:04,  6.85it/s]


 epoch: 10448 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%

 epoch: 10449 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%


 70%|██████▉   | 10450/15000 [21:14<12:21,  6.14it/s]


input:       gesture he and colbert had already thought of the idea and the deposit for using the national mall was

target:      gesture he and colbert had already thought of the idea and the deposit for using the national mall was already

prediction:  gesture he and colbert had already thought of the idea and the deposit for using the national mall was the

 epoch: 10450 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 70%|██████▉   | 10452/15000 [21:14<10:53,  6.96it/s]


 epoch: 10451 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10452 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 70%|██████▉   | 10454/15000 [21:15<09:33,  7.93it/s]


 epoch: 10453 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.8%

 epoch: 10454 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 70%|██████▉   | 10456/15000 [21:15<08:39,  8.74it/s]


 epoch: 10455 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10456 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 70%|██████▉   | 10457/15000 [21:15<08:25,  8.99it/s]


 epoch: 10457 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 70%|██████▉   | 10458/15000 [21:16<17:30,  4.32it/s]


 epoch: 10458 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.25, test_acc: 96.8%

 epoch: 10459 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

input:       its public github repos as of november update reddit decommissioned its servers which it owned and migrated to amazon

target:      its public github repos as of november update reddit decommissioned its servers which it owned and migrated to amazon web

prediction:  its public github repos as of november update reddit decommissioned its servers which it owned and migrated to amazon the


 70%|██████▉   | 10461/15000 [21:16<12:00,  6.30it/s]


 epoch: 10460 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10461 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10462 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 70%|██████▉   | 10465/15000 [21:16<08:32,  8.85it/s]


 epoch: 10463 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10464 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10465 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 70%|██████▉   | 10467/15000 [21:16<07:53,  9.57it/s]


 epoch: 10466 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10467 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10468 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 70%|██████▉   | 10469/15000 [21:17<07:38,  9.88it/s]


 epoch: 10469 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

input:       and traumatic injuries from construction and warfare all took significant toll on the body the grit and sand from

target:      and traumatic injuries from construction and warfare all took significant toll on the body the grit and sand from stone

prediction:  and traumatic injuries from construction and warfare all took significant toll on the body the grit and sand from the

 epoch: 10470 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 70%|██████▉   | 10472/15000 [21:17<07:58,  9.46it/s]


 epoch: 10471 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10472 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 70%|██████▉   | 10474/15000 [21:18<13:11,  5.72it/s]


 epoch: 10473 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10474 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 10475 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 70%|██████▉   | 10478/15000 [21:18<09:03,  8.32it/s]


 epoch: 10476 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10477 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10478 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 70%|██████▉   | 10480/15000 [21:18<08:40,  8.68it/s]


 epoch: 10479 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%

input:       the russian empire stood to benefit from the decline whereas the habsburg empire and britain perceived the preservation of

target:      the russian empire stood to benefit from the decline whereas the habsburg empire and britain perceived the preservation of the

prediction:  the russian empire stood to benefit from the decline whereas the habsburg empire and britain perceived the preservation of the

 epoch: 10480 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 70%|██████▉   | 10482/15000 [21:18<08:02,  9.37it/s]


 epoch: 10481 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10482 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10483 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 70%|██████▉   | 10486/15000 [21:19<07:34,  9.94it/s]


 epoch: 10484 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10485 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10486 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 70%|██████▉   | 10488/15000 [21:19<09:52,  7.61it/s]


 epoch: 10487 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10488 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10489 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 70%|██████▉   | 10490/15000 [21:19<09:26,  7.96it/s]


input:       was the most powerful economic cultural and military force in europe emperor justinian presided over constantinople first golden age

target:      was the most powerful economic cultural and military force in europe emperor justinian presided over constantinople first golden age he

prediction:  was the most powerful economic cultural and military force in europe emperor justinian presided over constantinople first golden age the

 epoch: 10490 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10491 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 70%|██████▉   | 10494/15000 [21:20<07:52,  9.53it/s]


 epoch: 10492 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10493 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10494 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 70%|██████▉   | 10496/15000 [21:20<07:38,  9.82it/s]


 epoch: 10495 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10496 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10497 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 70%|██████▉   | 10498/15000 [21:20<07:38,  9.81it/s]


 epoch: 10498 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10499 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       feature has been missing from powershell through this version includes some general cmdlet updates and fixes testing for framework

target:      feature has been missing from powershell through this version includes some general cmdlet updates and fixes testing for framework dependent

prediction:  feature has been missing from powershell through this version includes some general cmdlet updates and fixes testing for framework the


 70%|███████   | 10500/15000 [21:20<07:44,  9.68it/s]


 epoch: 10500 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 70%|███████   | 10502/15000 [21:21<11:50,  6.33it/s]


 epoch: 10501 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10502 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10503 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 70%|███████   | 10506/15000 [21:21<08:40,  8.64it/s]


 epoch: 10504 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10505 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10506 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 70%|███████   | 10508/15000 [21:21<08:05,  9.25it/s]


 epoch: 10507 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10508 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10509 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 70%|███████   | 10511/15000 [21:22<08:05,  9.25it/s]


input:       of the coldest places in the northern hemisphere and can act as source of arctic air masses for north

target:      of the coldest places in the northern hemisphere and can act as source of arctic air masses for north america

prediction:  of the coldest places in the northern hemisphere and can act as source of arctic air masses for north the

 epoch: 10510 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10511 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 70%|███████   | 10513/15000 [21:22<08:02,  9.30it/s]


 epoch: 10512 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10513 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

 epoch: 10514 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.2%


 70%|███████   | 10517/15000 [21:23<11:29,  6.50it/s]


 epoch: 10515 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10516 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10517 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 70%|███████   | 10519/15000 [21:23<09:52,  7.56it/s]


 epoch: 10518 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10519 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       research center poll found that americans were the most supportive of free expression of any polity measured they are

target:      research center poll found that americans were the most supportive of free expression of any polity measured they are also

prediction:  research center poll found that americans were the most supportive of free expression of any polity measured they are the


 70%|███████   | 10521/15000 [21:23<09:30,  7.85it/s]


 epoch: 10520 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10521 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10522 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 70%|███████   | 10525/15000 [21:24<07:49,  9.53it/s]


 epoch: 10523 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 10524 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10525 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 70%|███████   | 10527/15000 [21:24<07:43,  9.64it/s]


 epoch: 10526 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10527 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10528 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 70%|███████   | 10530/15000 [21:24<12:15,  6.07it/s]


 epoch: 10529 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%

input:       to the capitol and even threw stones when news of the public anger in rome spread across the empire

target:      to the capitol and even threw stones when news of the public anger in rome spread across the empire the

prediction:  to the capitol and even threw stones when news of the public anger in rome spread across the empire the

 epoch: 10530 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 70%|███████   | 10532/15000 [21:25<10:25,  7.14it/s]


 epoch: 10531 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10532 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.5%


 70%|███████   | 10534/15000 [21:25<09:39,  7.71it/s]


 epoch: 10533 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 10534 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 70%|███████   | 10536/15000 [21:25<09:11,  8.10it/s]


 epoch: 10535 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10536 | train_loss: 0.18, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.0%


 70%|███████   | 10538/15000 [21:25<09:16,  8.02it/s]


 epoch: 10537 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10538 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 70%|███████   | 10540/15000 [21:26<10:38,  6.99it/s]


 epoch: 10539 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       several countries hindus form of the guyanese population and of suriname muslims account for of the guyanese population and

target:      several countries hindus form of the guyanese population and of suriname muslims account for of the guyanese population and of

prediction:  several countries hindus form of the guyanese population and of suriname muslims account for of the guyanese population and the

 epoch: 10540 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 70%|███████   | 10542/15000 [21:26<10:14,  7.25it/s]


 epoch: 10541 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10542 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 70%|███████   | 10544/15000 [21:26<12:50,  5.78it/s]


 epoch: 10543 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10544 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 70%|███████   | 10546/15000 [21:27<10:52,  6.83it/s]


 epoch: 10545 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10546 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 70%|███████   | 10548/15000 [21:27<09:59,  7.43it/s]


 epoch: 10547 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10548 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 70%|███████   | 10550/15000 [21:27<11:13,  6.61it/s]


 epoch: 10549 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       the big bang model rests on two theoretical pillars albert einstein general relativity and the cosmological principle cosmologists have

target:      the big bang model rests on two theoretical pillars albert einstein general relativity and the cosmological principle cosmologists have recently

prediction:  the big bang model rests on two theoretical pillars albert einstein general relativity and the cosmological principle cosmologists have the

 epoch: 10550 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 70%|███████   | 10552/15000 [21:27<10:22,  7.15it/s]


 epoch: 10551 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10552 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 70%|███████   | 10554/15000 [21:28<09:43,  7.63it/s]


 epoch: 10553 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10554 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 70%|███████   | 10556/15000 [21:28<09:37,  7.70it/s]


 epoch: 10555 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 10556 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%


 70%|███████   | 10557/15000 [21:28<09:49,  7.54it/s]


 epoch: 10557 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 70%|███████   | 10559/15000 [21:29<15:41,  4.72it/s]


 epoch: 10558 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10559 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

input:       and played major role in the economy slavery was complex institution that supported traditional roman social structures as well

target:      and played major role in the economy slavery was complex institution that supported traditional roman social structures as well as


 70%|███████   | 10560/15000 [21:29<14:27,  5.12it/s]


prediction:  and played major role in the economy slavery was complex institution that supported traditional roman social structures as well the

 epoch: 10560 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10561 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 70%|███████   | 10564/15000 [21:29<09:24,  7.86it/s]


 epoch: 10562 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10563 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

 epoch: 10564 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 70%|███████   | 10566/15000 [21:29<08:26,  8.75it/s]


 epoch: 10565 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 10566 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10567 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%


 70%|███████   | 10568/15000 [21:30<07:53,  9.35it/s]


 epoch: 10568 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10569 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       was coined as internet slang for the endless intake of cultural newbies commercial use became established alongside academic and

target:      was coined as internet slang for the endless intake of cultural newbies commercial use became established alongside academic and professional

prediction:  was coined as internet slang for the endless intake of cultural newbies commercial use became established alongside academic and the


 70%|███████   | 10570/15000 [21:30<08:07,  9.09it/s]


 epoch: 10570 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 70%|███████   | 10573/15000 [21:31<10:42,  6.89it/s]


 epoch: 10571 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10572 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10573 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 71%|███████   | 10576/15000 [21:31<08:47,  8.39it/s]


 epoch: 10574 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10575 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10576 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 71%|███████   | 10578/15000 [21:31<08:14,  8.95it/s]


 epoch: 10577 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10578 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10579 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 71%|███████   | 10580/15000 [21:31<08:04,  9.11it/s]


input:       to the th to th centuries bc and which included the golden age of athens the conquests of alexander

target:      to the th to th centuries bc and which included the golden age of athens the conquests of alexander the

prediction:  to the th to th centuries bc and which included the golden age of athens the conquests of alexander the

 epoch: 10580 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10581 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 71%|███████   | 10584/15000 [21:32<07:12, 10.22it/s]


 epoch: 10582 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10583 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10584 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10585 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 71%|███████   | 10588/15000 [21:32<08:40,  8.48it/s]


 epoch: 10586 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10587 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10588 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%


 71%|███████   | 10590/15000 [21:32<08:46,  8.38it/s]


 epoch: 10589 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

input:       egyptian samples had little more maternal sub saharan component suggesting some degree of influx after the end of the

target:      egyptian samples had little more maternal sub saharan component suggesting some degree of influx after the end of the empire

prediction:  egyptian samples had little more maternal sub saharan component suggesting some degree of influx after the end of the the

 epoch: 10590 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 71%|███████   | 10592/15000 [21:33<08:00,  9.18it/s]


 epoch: 10591 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10592 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10593 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 71%|███████   | 10596/15000 [21:33<07:06, 10.32it/s]


 epoch: 10594 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.2%

 epoch: 10595 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10596 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 71%|███████   | 10598/15000 [21:33<07:09, 10.25it/s]


 epoch: 10597 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10598 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10599 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

input:       oligarchy cleisthenes disliked the spartan rule along with many other athenians and so made his own bid for power

target:      oligarchy cleisthenes disliked the spartan rule along with many other athenians and so made his own bid for power the

prediction:  oligarchy cleisthenes disliked the spartan rule along with many other athenians and so made his own bid for power the


 71%|███████   | 10601/15000 [21:34<11:53,  6.17it/s]


 epoch: 10600 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10601 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10602 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%


 71%|███████   | 10605/15000 [21:34<08:50,  8.28it/s]


 epoch: 10603 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10604 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10605 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 96.9%


 71%|███████   | 10607/15000 [21:34<08:03,  9.09it/s]


 epoch: 10606 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10607 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10608 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%


 71%|███████   | 10611/15000 [21:35<07:28,  9.78it/s]


 epoch: 10609 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%

input:       times larger worlds than the playstation edition and described as nearly identical to the xbox one edition the playstation

target:      times larger worlds than the playstation edition and described as nearly identical to the xbox one edition the playstation vita

prediction:  times larger worlds than the playstation edition and described as nearly identical to the xbox one edition the playstation the

 epoch: 10610 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10611 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.19, test_acc: 97.3%


 71%|███████   | 10613/15000 [21:35<07:10, 10.18it/s]


 epoch: 10612 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10613 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 71%|███████   | 10615/15000 [21:35<08:45,  8.34it/s]


 epoch: 10614 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10615 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10616 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 71%|███████   | 10619/15000 [21:36<07:22,  9.90it/s]


 epoch: 10617 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10618 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10619 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%


 71%|███████   | 10621/15000 [21:36<07:50,  9.31it/s]


input:       text between the quotes is string and in many programming languages dividing number by string has no meaning and

target:      text between the quotes is string and in many programming languages dividing number by string has no meaning and will

prediction:  text between the quotes is string and in many programming languages dividing number by string has no meaning and the

 epoch: 10620 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10621 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%


 71%|███████   | 10624/15000 [21:36<07:23,  9.87it/s]


 epoch: 10622 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%

 epoch: 10623 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10624 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 71%|███████   | 10626/15000 [21:36<07:15, 10.04it/s]


 epoch: 10625 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10626 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10627 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.3%


 71%|███████   | 10630/15000 [21:37<08:16,  8.80it/s]


 epoch: 10628 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10629 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

input:       howard rheingold the ethnography of cyberspace is an important aspect of cyberculture that does not reflect single unified culture

target:      howard rheingold the ethnography of cyberspace is an important aspect of cyberculture that does not reflect single unified culture it

prediction:  howard rheingold the ethnography of cyberspace is an important aspect of cyberculture that does not reflect single unified culture the

 epoch: 10630 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 71%|███████   | 10632/15000 [21:37<07:41,  9.46it/s]


 epoch: 10631 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10632 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 10633 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 71%|███████   | 10636/15000 [21:37<06:56, 10.49it/s]


 epoch: 10634 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10635 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10636 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.4%


 71%|███████   | 10638/15000 [21:38<06:55, 10.51it/s]


 epoch: 10637 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10638 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10639 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 71%|███████   | 10640/15000 [21:38<07:17,  9.98it/s]


input:       army was considered the senior and more prestigious branch an annexed territory became roman province in three steps making

target:      army was considered the senior and more prestigious branch an annexed territory became roman province in three steps making register

prediction:  army was considered the senior and more prestigious branch an annexed territory became roman province in three steps making the

 epoch: 10640 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10641 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 71%|███████   | 10644/15000 [21:39<10:31,  6.90it/s]


 epoch: 10642 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10643 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10644 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 71%|███████   | 10646/15000 [21:39<09:36,  7.55it/s]


 epoch: 10645 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10646 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 71%|███████   | 10648/15000 [21:39<09:19,  7.78it/s]


 epoch: 10647 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10648 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 71%|███████   | 10650/15000 [21:39<10:23,  6.98it/s]


 epoch: 10649 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       services are produced and exchanged according to demand and supply between participants economic agents by barter or medium of

target:      services are produced and exchanged according to demand and supply between participants economic agents by barter or medium of exchange

prediction:  services are produced and exchanged according to demand and supply between participants economic agents by barter or medium of the

 epoch: 10650 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 71%|███████   | 10652/15000 [21:40<09:33,  7.58it/s]


 epoch: 10651 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10652 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 71%|███████   | 10654/15000 [21:40<09:22,  7.73it/s]


 epoch: 10653 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10654 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 71%|███████   | 10656/15000 [21:40<09:21,  7.74it/s]


 epoch: 10655 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 10656 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 71%|███████   | 10658/15000 [21:41<11:31,  6.28it/s]


 epoch: 10657 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10658 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 71%|███████   | 10659/15000 [21:41<10:41,  6.77it/s]


 epoch: 10659 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

input:       the largest percentage of any country the also ranks first in the number of dollar billionaires and millionaires

target:      the largest percentage of any country the also ranks first in the number of dollar billionaires and millionaires with

prediction:  the largest percentage of any country the also ranks first in the number of dollar billionaires and millionaires the


 71%|███████   | 10661/15000 [21:41<10:54,  6.63it/s]


 epoch: 10660 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10661 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 71%|███████   | 10663/15000 [21:41<10:33,  6.84it/s]


 epoch: 10662 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10663 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 71%|███████   | 10665/15000 [21:42<09:13,  7.83it/s]


 epoch: 10664 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.4%

 epoch: 10665 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 71%|███████   | 10667/15000 [21:42<08:46,  8.24it/s]


 epoch: 10666 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10667 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 71%|███████   | 10669/15000 [21:42<08:38,  8.36it/s]


 epoch: 10668 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10669 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 71%|███████   | 10670/15000 [21:42<09:56,  7.26it/s]


input:       formed themselves into two leagues the achaean league including thebes corinth and argos and the aetolian league including sparta

target:      formed themselves into two leagues the achaean league including thebes corinth and argos and the aetolian league including sparta and

prediction:  formed themselves into two leagues the achaean league including thebes corinth and argos and the aetolian league including sparta the

 epoch: 10670 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 71%|███████   | 10672/15000 [21:43<14:29,  4.98it/s]


 epoch: 10671 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10672 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 71%|███████   | 10674/15000 [21:43<11:37,  6.20it/s]


 epoch: 10673 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10674 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 71%|███████   | 10676/15000 [21:43<09:52,  7.30it/s]


 epoch: 10675 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10676 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 71%|███████   | 10678/15000 [21:44<08:48,  8.18it/s]


 epoch: 10677 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10678 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 71%|███████   | 10680/15000 [21:44<08:50,  8.14it/s]


 epoch: 10679 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       for its relatively large worlds the playstation edition also received generally favorable reviews being compared to the xbox edition

target:      for its relatively large worlds the playstation edition also received generally favorable reviews being compared to the xbox edition and

prediction:  for its relatively large worlds the playstation edition also received generally favorable reviews being compared to the xbox edition the

 epoch: 10680 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 71%|███████   | 10682/15000 [21:44<08:13,  8.75it/s]


 epoch: 10681 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10682 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 71%|███████   | 10684/15000 [21:44<07:44,  9.30it/s]


 epoch: 10683 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10684 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 71%|███████   | 10687/15000 [21:45<11:36,  6.19it/s]


 epoch: 10685 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10686 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10687 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 71%|███████▏  | 10688/15000 [21:45<10:40,  6.73it/s]


 epoch: 10688 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10689 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       new zealand have large european derived populations africa has no countries with european derived majorities or with the exception

target:      new zealand have large european derived populations africa has no countries with european derived majorities or with the exception of

prediction:  new zealand have large european derived populations africa has no countries with european derived majorities or with the exception the


 71%|███████▏  | 10691/15000 [21:45<08:56,  8.02it/s]


 epoch: 10690 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10691 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10692 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 71%|███████▏  | 10694/15000 [21:46<08:03,  8.91it/s]


 epoch: 10693 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 10694 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 71%|███████▏  | 10696/15000 [21:46<07:28,  9.60it/s]


 epoch: 10695 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10696 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10697 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 71%|███████▏  | 10698/15000 [21:46<07:13,  9.92it/s]


 epoch: 10698 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10699 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

input:       religious performance and learning activities however by the th century the madrassa was introduced school that was built independently

target:      religious performance and learning activities however by the th century the madrassa was introduced school that was built independently from

prediction:  religious performance and learning activities however by the th century the madrassa was introduced school that was built independently the


 71%|███████▏  | 10701/15000 [21:46<07:22,  9.72it/s]


 epoch: 10700 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10701 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%

 epoch: 10702 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 71%|███████▏  | 10705/15000 [21:47<06:46, 10.57it/s]


 epoch: 10703 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10704 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.2%

 epoch: 10705 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 71%|███████▏  | 10707/15000 [21:47<06:41, 10.70it/s]


 epoch: 10706 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10707 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10708 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 71%|███████▏  | 10709/15000 [21:47<06:48, 10.49it/s]


 epoch: 10709 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

input:       hawaii starting in in the first american missionaries arrived to preach christianity and teach the hawaiians western ways as

target:      hawaii starting in in the first american missionaries arrived to preach christianity and teach the hawaiians western ways as of

prediction:  hawaii starting in in the first american missionaries arrived to preach christianity and teach the hawaiians western ways as the

 epoch: 10710 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 71%|███████▏  | 10712/15000 [21:47<07:26,  9.60it/s]


 epoch: 10711 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10712 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10713 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 71%|███████▏  | 10716/15000 [21:48<10:38,  6.70it/s]


 epoch: 10714 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10715 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10716 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 71%|███████▏  | 10718/15000 [21:48<09:13,  7.73it/s]


 epoch: 10717 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 10718 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 10719 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%


 71%|███████▏  | 10720/15000 [21:49<09:02,  7.89it/s]


input:       founded june the spanish explorer alonso de salazar landed in the marshall islands in they were named by krusenstern

target:      founded june the spanish explorer alonso de salazar landed in the marshall islands in they were named by krusenstern after

prediction:  founded june the spanish explorer alonso de salazar landed in the marshall islands in they were named by krusenstern the

 epoch: 10720 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10721 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 71%|███████▏  | 10724/15000 [21:49<07:32,  9.44it/s]


 epoch: 10722 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10723 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 10724 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 72%|███████▏  | 10726/15000 [21:49<07:18,  9.75it/s]


 epoch: 10725 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10726 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10727 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 72%|███████▏  | 10728/15000 [21:50<09:53,  7.20it/s]


 epoch: 10728 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10729 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

input:       the linguistic system but are an important part of how people use language as social tool for constructing groups

target:      the linguistic system but are an important part of how people use language as social tool for constructing groups however

prediction:  the linguistic system but are an important part of how people use language as social tool for constructing groups the

 epoch: 10730 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 72%|███████▏  | 10732/15000 [21:50<08:11,  8.68it/s]


 epoch: 10731 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10732 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10733 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 72%|███████▏  | 10736/15000 [21:50<07:03, 10.07it/s]


 epoch: 10734 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10735 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10736 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.0%


 72%|███████▏  | 10738/15000 [21:50<06:54, 10.29it/s]


 epoch: 10737 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10738 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10739 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 72%|███████▏  | 10740/15000 [21:51<07:23,  9.61it/s]


input:       comedy however both humour and comic are often used when theorising about the subject the connotations of humour as

target:      comedy however both humour and comic are often used when theorising about the subject the connotations of humour as opposed

prediction:  comedy however both humour and comic are often used when theorising about the subject the connotations of humour as the

 epoch: 10740 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10741 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 72%|███████▏  | 10744/15000 [21:52<10:01,  7.07it/s]


 epoch: 10742 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 10743 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10744 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 72%|███████▏  | 10746/15000 [21:52<09:00,  7.87it/s]


 epoch: 10745 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10746 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10747 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 72%|███████▏  | 10750/15000 [21:52<07:50,  9.03it/s]


 epoch: 10748 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10749 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

input:       had many iterations and currently uses lucidworks fusion to implementation in reddit released its first mobile web interface for

target:      had many iterations and currently uses lucidworks fusion to implementation in reddit released its first mobile web interface for easier

prediction:  had many iterations and currently uses lucidworks fusion to implementation in reddit released its first mobile web interface for the

 epoch: 10750 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 72%|███████▏  | 10752/15000 [21:52<07:25,  9.54it/s]


 epoch: 10751 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 10752 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10753 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 72%|███████▏  | 10754/15000 [21:52<07:03, 10.02it/s]


 epoch: 10754 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10755 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 72%|███████▏  | 10757/15000 [21:53<10:51,  6.51it/s]


 epoch: 10756 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10757 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10758 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 72%|███████▏  | 10760/15000 [21:54<09:58,  7.09it/s]


 epoch: 10759 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       although other regions did not begin ironworking until the early centuries ce copper objects from egypt north africa nubia

target:      although other regions did not begin ironworking until the early centuries ce copper objects from egypt north africa nubia and

prediction:  although other regions did not begin ironworking until the early centuries ce copper objects from egypt north africa nubia the

 epoch: 10760 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 72%|███████▏  | 10761/15000 [21:54<09:23,  7.52it/s]


 epoch: 10761 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10762 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10763 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 72%|███████▏  | 10764/15000 [21:54<08:19,  8.49it/s]



 epoch: 10764 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 72%|███████▏  | 10766/15000 [21:54<07:40,  9.20it/s]


 epoch: 10765 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10766 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10767 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 72%|███████▏  | 10768/15000 [21:54<07:17,  9.67it/s]


 epoch: 10768 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 10769 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

input:       harassment and new content guidelines these new content guidelines were aimed at banning content inciting violence and quarantining offensive


 72%|███████▏  | 10770/15000 [21:55<08:13,  8.57it/s]


target:      harassment and new content guidelines these new content guidelines were aimed at banning content inciting violence and quarantining offensive material

prediction:  harassment and new content guidelines these new content guidelines were aimed at banning content inciting violence and quarantining offensive the

 epoch: 10770 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 72%|███████▏  | 10772/15000 [21:55<10:21,  6.81it/s]


 epoch: 10771 | train_loss: 0.26, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10772 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 72%|███████▏  | 10774/15000 [21:55<09:34,  7.35it/s]


 epoch: 10773 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.4%

 epoch: 10774 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 72%|███████▏  | 10776/15000 [21:56<08:57,  7.85it/s]


 epoch: 10775 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10776 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 72%|███████▏  | 10778/15000 [21:56<08:26,  8.33it/s]


 epoch: 10777 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10778 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 72%|███████▏  | 10779/15000 [21:56<08:22,  8.39it/s]


 epoch: 10779 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       longevity of its hosts will generally survive longer on the contrary meme that shortens the longevity of its hosts

target:      longevity of its hosts will generally survive longer on the contrary meme that shortens the longevity of its hosts will

prediction:  longevity of its hosts will generally survive longer on the contrary meme that shortens the longevity of its hosts the

 epoch: 10780 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 72%|███████▏  | 10782/15000 [21:56<09:15,  7.59it/s]


 epoch: 10781 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10782 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 72%|███████▏  | 10784/15000 [21:57<09:24,  7.47it/s]


 epoch: 10783 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 10784 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 72%|███████▏  | 10786/15000 [21:57<09:25,  7.45it/s]


 epoch: 10785 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10786 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 72%|███████▏  | 10788/15000 [21:57<08:41,  8.07it/s]


 epoch: 10787 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10788 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 72%|███████▏  | 10790/15000 [21:57<10:05,  6.96it/s]


 epoch: 10789 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       of coordinates on spacetime there is no generally covariant sense in which space expands the recession speeds associated with

target:      of coordinates on spacetime there is no generally covariant sense in which space expands the recession speeds associated with hubble

prediction:  of coordinates on spacetime there is no generally covariant sense in which space expands the recession speeds associated with the

 epoch: 10790 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 72%|███████▏  | 10792/15000 [21:58<09:20,  7.51it/s]


 epoch: 10791 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10792 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 72%|███████▏  | 10794/15000 [21:58<08:34,  8.17it/s]


 epoch: 10793 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10794 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 72%|███████▏  | 10796/15000 [21:58<08:12,  8.54it/s]


 epoch: 10795 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10796 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 72%|███████▏  | 10797/15000 [21:58<07:54,  8.86it/s]


 epoch: 10797 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10798 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 72%|███████▏  | 10800/15000 [21:59<13:32,  5.17it/s]


 epoch: 10799 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

input:       later received support for texture packs in its twelfth title update while introducing mash up packs which combined texture

target:      later received support for texture packs in its twelfth title update while introducing mash up packs which combined texture packs

prediction:  later received support for texture packs in its twelfth title update while introducing mash up packs which combined texture the

 epoch: 10800 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 72%|███████▏  | 10803/15000 [21:59<09:36,  7.29it/s]


 epoch: 10801 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10802 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10803 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 72%|███████▏  | 10806/15000 [22:00<07:52,  8.88it/s]


 epoch: 10804 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 10805 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 10806 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%


 72%|███████▏  | 10809/15000 [22:00<07:05,  9.84it/s]


 epoch: 10807 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10808 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10809 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 72%|███████▏  | 10811/15000 [22:00<07:37,  9.16it/s]


input:       differences in their mechanisms and interactions of the markers found in each following the model put forth by lawrence

target:      differences in their mechanisms and interactions of the markers found in each following the model put forth by lawrence lessig

prediction:  differences in their mechanisms and interactions of the markers found in each following the model put forth by lawrence the

 epoch: 10810 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10811 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 72%|███████▏  | 10812/15000 [22:00<07:29,  9.31it/s]


 epoch: 10812 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 72%|███████▏  | 10814/15000 [22:01<12:37,  5.52it/s]


 epoch: 10813 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10814 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10815 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 72%|███████▏  | 10818/15000 [22:01<08:38,  8.07it/s]


 epoch: 10816 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 10817 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10818 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 72%|███████▏  | 10820/15000 [22:01<08:14,  8.46it/s]


 epoch: 10819 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

input:       of psychology in selecting and evaluating workers another subfield organizational psychology examines the effects of work environments and management

target:      of psychology in selecting and evaluating workers another subfield organizational psychology examines the effects of work environments and management styles

prediction:  of psychology in selecting and evaluating workers another subfield organizational psychology examines the effects of work environments and management the

 epoch: 10820 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 72%|███████▏  | 10822/15000 [22:02<07:31,  9.26it/s]


 epoch: 10821 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10822 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10823 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%


 72%|███████▏  | 10826/15000 [22:02<06:44, 10.31it/s]


 epoch: 10824 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10825 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10826 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10827 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 72%|███████▏  | 10828/15000 [22:02<09:55,  7.01it/s]


 epoch: 10828 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 10829 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       as the channel islands the farallon islands and vancouver island all of these islands lie in or close to

target:      as the channel islands the farallon islands and vancouver island all of these islands lie in or close to the

prediction:  as the channel islands the farallon islands and vancouver island all of these islands lie in or close to the


 72%|███████▏  | 10831/15000 [22:03<08:49,  7.87it/s]


 epoch: 10830 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10831 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 72%|███████▏  | 10833/15000 [22:03<07:58,  8.70it/s]


 epoch: 10832 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%

 epoch: 10833 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10834 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%


 72%|███████▏  | 10837/15000 [22:03<06:54, 10.05it/s]


 epoch: 10835 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10836 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 10837 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 72%|███████▏  | 10839/15000 [22:03<06:45, 10.26it/s]


 epoch: 10838 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.2%

 epoch: 10839 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       cartographer herman moll suggested in europe was bounded by series of partly joined waterways directed towards the turkish straits

target:      cartographer herman moll suggested in europe was bounded by series of partly joined waterways directed towards the turkish straits and

prediction:  cartographer herman moll suggested in europe was bounded by series of partly joined waterways directed towards the turkish straits the


 72%|███████▏  | 10841/15000 [22:04<06:59,  9.91it/s]


 epoch: 10840 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10841 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 72%|███████▏  | 10843/15000 [22:04<11:01,  6.28it/s]


 epoch: 10842 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10843 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10844 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 72%|███████▏  | 10847/15000 [22:05<08:33,  8.08it/s]


 epoch: 10845 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10846 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10847 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%


 72%|███████▏  | 10849/15000 [22:05<07:53,  8.77it/s]


 epoch: 10848 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10849 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

input:       league for counter strike countering concerns over the state of the current promotion relegation leagues the league was to

target:      league for counter strike countering concerns over the state of the current promotion relegation leagues the league was to be

prediction:  league for counter strike countering concerns over the state of the current promotion relegation leagues the league was to the


 72%|███████▏  | 10851/15000 [22:05<07:43,  8.95it/s]


 epoch: 10850 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10851 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10852 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 72%|███████▏  | 10854/15000 [22:05<07:22,  9.37it/s]


 epoch: 10853 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10854 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10855 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 72%|███████▏  | 10857/15000 [22:06<11:14,  6.14it/s]


 epoch: 10856 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10857 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 10858 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 72%|███████▏  | 10860/15000 [22:06<09:17,  7.43it/s]


 epoch: 10859 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       money the deal was closed on april in early july the github archive program was established to archive its

target:      money the deal was closed on april in early july the github archive program was established to archive its open

prediction:  money the deal was closed on april in early july the github archive program was established to archive its the

 epoch: 10860 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 72%|███████▏  | 10862/15000 [22:07<08:24,  8.20it/s]


 epoch: 10861 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10862 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 72%|███████▏  | 10864/15000 [22:07<07:46,  8.86it/s]


 epoch: 10863 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10864 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 72%|███████▏  | 10867/15000 [22:07<07:12,  9.55it/s]


 epoch: 10865 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10866 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10867 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 72%|███████▏  | 10868/15000 [22:07<07:10,  9.59it/s]


 epoch: 10868 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 72%|███████▏  | 10870/15000 [22:08<13:04,  5.26it/s]


 epoch: 10869 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.3%

input:       the future that we will be able to influence the presence of either type of horizon depends on the

target:      the future that we will be able to influence the presence of either type of horizon depends on the details

prediction:  the future that we will be able to influence the presence of either type of horizon depends on the the

 epoch: 10870 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%


 72%|███████▏  | 10872/15000 [22:08<10:34,  6.50it/s]


 epoch: 10871 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10872 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 72%|███████▏  | 10874/15000 [22:08<09:26,  7.29it/s]


 epoch: 10873 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10874 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 73%|███████▎  | 10876/15000 [22:09<08:57,  7.67it/s]


 epoch: 10875 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10876 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 73%|███████▎  | 10878/15000 [22:09<08:21,  8.22it/s]


 epoch: 10877 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10878 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 73%|███████▎  | 10880/15000 [22:09<09:38,  7.12it/s]


 epoch: 10879 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       the ice was being held back by thread of ice about km mi wide prior to its collapse in

target:      the ice was being held back by thread of ice about km mi wide prior to its collapse in as

prediction:  the ice was being held back by thread of ice about km mi wide prior to its collapse in the

 epoch: 10880 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 73%|███████▎  | 10882/15000 [22:09<09:10,  7.48it/s]


 epoch: 10881 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10882 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 73%|███████▎  | 10883/15000 [22:09<09:04,  7.56it/s]


 epoch: 10883 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 73%|███████▎  | 10885/15000 [22:10<10:29,  6.53it/s]


 epoch: 10884 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10885 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 73%|███████▎  | 10887/15000 [22:10<09:13,  7.43it/s]


 epoch: 10886 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 10887 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 73%|███████▎  | 10889/15000 [22:10<08:32,  8.03it/s]


 epoch: 10888 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10889 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 73%|███████▎  | 10890/15000 [22:11<10:15,  6.68it/s]


input:       bans included the donald gendercritical the platform largest and most active anti transgender radical feminist subreddit and chapotraphouse far

target:      bans included the donald gendercritical the platform largest and most active anti transgender radical feminist subreddit and chapotraphouse far left

prediction:  bans included the donald gendercritical the platform largest and most active anti transgender radical feminist subreddit and chapotraphouse far the

 epoch: 10890 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 73%|███████▎  | 10892/15000 [22:11<09:14,  7.41it/s]


 epoch: 10891 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10892 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.1%


 73%|███████▎  | 10894/15000 [22:11<09:06,  7.51it/s]


 epoch: 10893 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10894 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 73%|███████▎  | 10896/15000 [22:11<09:14,  7.40it/s]


 epoch: 10895 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10896 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 73%|███████▎  | 10898/15000 [22:12<09:37,  7.10it/s]


 epoch: 10897 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10898 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 73%|███████▎  | 10900/15000 [22:12<09:43,  7.02it/s]


 epoch: 10899 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       that there are differences in their mechanisms and interactions of the markers found in each following the model put

target:      that there are differences in their mechanisms and interactions of the markers found in each following the model put forth

prediction:  that there are differences in their mechanisms and interactions of the markers found in each following the model put the

 epoch: 10900 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 73%|███████▎  | 10902/15000 [22:12<09:11,  7.42it/s]


 epoch: 10901 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10902 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 73%|███████▎  | 10905/15000 [22:12<07:40,  8.90it/s]


 epoch: 10903 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10904 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

 epoch: 10905 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 73%|███████▎  | 10907/15000 [22:13<07:16,  9.38it/s]


 epoch: 10906 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10907 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10908 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 73%|███████▎  | 10909/15000 [22:13<07:06,  9.58it/s]



 epoch: 10909 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       and igneous petrology aims to determine the history of igneous rocks from their original molten source to their final

target:      and igneous petrology aims to determine the history of igneous rocks from their original molten source to their final crystallization


 73%|███████▎  | 10911/15000 [22:13<07:45,  8.78it/s]


prediction:  and igneous petrology aims to determine the history of igneous rocks from their original molten source to their final the

 epoch: 10910 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10911 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.0%


 73%|███████▎  | 10912/15000 [22:13<07:37,  8.93it/s]


 epoch: 10912 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 73%|███████▎  | 10915/15000 [22:14<11:24,  5.97it/s]


 epoch: 10913 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10914 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10915 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 73%|███████▎  | 10918/15000 [22:14<08:58,  7.58it/s]


 epoch: 10916 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10917 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 10918 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 73%|███████▎  | 10920/15000 [22:14<08:28,  8.03it/s]


 epoch: 10919 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

input:       drastically reduced in recent years in it dropped to million in to million in to million and in to

target:      drastically reduced in recent years in it dropped to million in to million in to million and in to thousand

prediction:  drastically reduced in recent years in it dropped to million in to million in to million and in to the

 epoch: 10920 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 73%|███████▎  | 10923/15000 [22:15<07:35,  8.95it/s]


 epoch: 10921 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10922 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10923 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 73%|███████▎  | 10925/15000 [22:15<07:23,  9.18it/s]


 epoch: 10924 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10925 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%


 73%|███████▎  | 10928/15000 [22:16<10:55,  6.21it/s]


 epoch: 10926 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 10927 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10928 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%


 73%|███████▎  | 10930/15000 [22:16<09:54,  6.85it/s]


 epoch: 10929 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       europe germanic languages are spoken in western northern and central europe as well as in gibraltar and malta in

target:      europe germanic languages are spoken in western northern and central europe as well as in gibraltar and malta in southern

prediction:  europe germanic languages are spoken in western northern and central europe as well as in gibraltar and malta in the

 epoch: 10930 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 73%|███████▎  | 10933/15000 [22:16<08:09,  8.30it/s]


 epoch: 10931 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10932 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10933 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 73%|███████▎  | 10935/15000 [22:16<07:37,  8.88it/s]


 epoch: 10934 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10935 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10936 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%


 73%|███████▎  | 10939/15000 [22:17<06:25, 10.54it/s]


 epoch: 10937 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10938 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10939 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

input:       europe allowed states to use the immense property of the church for the development of towns the influence of

target:      europe allowed states to use the immense property of the church for the development of towns the influence of the

prediction:  europe allowed states to use the immense property of the church for the development of towns the influence of the


 73%|███████▎  | 10941/15000 [22:17<09:07,  7.41it/s]


 epoch: 10940 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10941 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10942 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 73%|███████▎  | 10945/15000 [22:17<07:20,  9.22it/s]


 epoch: 10943 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 10944 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10945 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%


 73%|███████▎  | 10947/15000 [22:18<07:02,  9.59it/s]


 epoch: 10946 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 10947 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10948 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 73%|███████▎  | 10949/15000 [22:18<06:49,  9.89it/s]


 epoch: 10949 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

input:       with preexisting classroom assignments psychologists will compare the achievement of children attending phonics and whole language classes and perhaps

target:      with preexisting classroom assignments psychologists will compare the achievement of children attending phonics and whole language classes and perhaps statistically

prediction:  with preexisting classroom assignments psychologists will compare the achievement of children attending phonics and whole language classes and perhaps the

 epoch: 10950 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 73%|███████▎  | 10952/15000 [22:18<07:09,  9.42it/s]


 epoch: 10951 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10952 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 73%|███████▎  | 10953/15000 [22:18<07:07,  9.48it/s]


 epoch: 10953 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 73%|███████▎  | 10956/15000 [22:19<10:46,  6.25it/s]


 epoch: 10954 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 10955 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10956 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 73%|███████▎  | 10959/15000 [22:19<08:26,  7.98it/s]


 epoch: 10957 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 10958 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.20, test_acc: 97.3%

 epoch: 10959 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 73%|███████▎  | 10960/15000 [22:19<08:39,  7.77it/s]


input:       the new genetic variants also to the other populations depending on how far two species have diverged since their

target:      the new genetic variants also to the other populations depending on how far two species have diverged since their most

prediction:  the new genetic variants also to the other populations depending on how far two species have diverged since their the

 epoch: 10960 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10961 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 73%|███████▎  | 10963/15000 [22:20<07:30,  8.96it/s]


 epoch: 10962 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10963 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10964 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 73%|███████▎  | 10967/15000 [22:20<06:41, 10.04it/s]


 epoch: 10965 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 10966 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 10967 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 73%|███████▎  | 10969/15000 [22:21<11:53,  5.65it/s]


 epoch: 10968 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10969 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

input:       be quick and relatively painless for honestiores while humiliores might suffer the kinds of torturous death previously reserved for

target:      be quick and relatively painless for honestiores while humiliores might suffer the kinds of torturous death previously reserved for slaves

prediction:  be quick and relatively painless for honestiores while humiliores might suffer the kinds of torturous death previously reserved for the


 73%|███████▎  | 10972/15000 [22:21<09:32,  7.03it/s]


 epoch: 10970 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 10971 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 10972 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 73%|███████▎  | 10975/15000 [22:21<07:56,  8.45it/s]


 epoch: 10973 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10974 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 10975 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 73%|███████▎  | 10977/15000 [22:22<07:18,  9.17it/s]


 epoch: 10976 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 10977 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10978 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 73%|███████▎  | 10979/15000 [22:22<06:48,  9.84it/s]


 epoch: 10979 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

input:       exhibits in minecraft in conjunction with members of the public microsoft and the non profit organisation code org had

target:      exhibits in minecraft in conjunction with members of the public microsoft and the non profit organisation code org had teamed

prediction:  exhibits in minecraft in conjunction with members of the public microsoft and the non profit organisation code org had the

 epoch: 10980 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%


 73%|███████▎  | 10982/15000 [22:22<07:07,  9.40it/s]


 epoch: 10981 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10982 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 73%|███████▎  | 10984/15000 [22:22<08:56,  7.48it/s]


 epoch: 10983 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10984 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 73%|███████▎  | 10986/15000 [22:23<08:34,  7.81it/s]


 epoch: 10985 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 10986 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.3%


 73%|███████▎  | 10988/15000 [22:23<08:16,  8.08it/s]


 epoch: 10987 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.3%

 epoch: 10988 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 73%|███████▎  | 10990/15000 [22:23<09:44,  6.87it/s]


 epoch: 10989 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

input:       coefficient the analysis of variance multiple linear regression logistic regression structural equation modeling and hierarchical linear modeling the measurement

target:      coefficient the analysis of variance multiple linear regression logistic regression structural equation modeling and hierarchical linear modeling the measurement and

prediction:  coefficient the analysis of variance multiple linear regression logistic regression structural equation modeling and hierarchical linear modeling the measurement the

 epoch: 10990 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 73%|███████▎  | 10992/15000 [22:23<09:14,  7.23it/s]


 epoch: 10991 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 10992 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 73%|███████▎  | 10994/15000 [22:24<09:01,  7.40it/s]


 epoch: 10993 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 10994 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 73%|███████▎  | 10996/15000 [22:24<08:53,  7.50it/s]


 epoch: 10995 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 10996 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 73%|███████▎  | 10998/15000 [22:25<14:30,  4.59it/s]


 epoch: 10997 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 10998 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 73%|███████▎  | 11000/15000 [22:25<11:54,  5.60it/s]


 epoch: 10999 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

input:       then expanded into greece and the eastern mediterranean while series of internal conflicts led to the republic becoming an

target:      then expanded into greece and the eastern mediterranean while series of internal conflicts led to the republic becoming an empire

prediction:  then expanded into greece and the eastern mediterranean while series of internal conflicts led to the republic becoming an the

 epoch: 11000 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 73%|███████▎  | 11002/15000 [22:25<09:22,  7.11it/s]


 epoch: 11001 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%

 epoch: 11002 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11003 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 73%|███████▎  | 11004/15000 [22:25<08:07,  8.19it/s]



 epoch: 11004 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 73%|███████▎  | 11006/15000 [22:26<07:47,  8.55it/s]


 epoch: 11005 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11006 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 73%|███████▎  | 11008/15000 [22:26<07:50,  8.48it/s]


 epoch: 11007 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11008 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 73%|███████▎  | 11009/15000 [22:26<07:54,  8.41it/s]


 epoch: 11009 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       comedians use open mics to work on material or to show off their skills to get an opener slot

target:      comedians use open mics to work on material or to show off their skills to get an opener slot bringer

prediction:  comedians use open mics to work on material or to show off their skills to get an opener slot the


 73%|███████▎  | 11011/15000 [22:26<09:03,  7.34it/s]


 epoch: 11010 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11011 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 73%|███████▎  | 11013/15000 [22:27<11:38,  5.71it/s]


 epoch: 11012 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11013 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 73%|███████▎  | 11015/15000 [22:27<09:17,  7.15it/s]


 epoch: 11014 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11015 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 73%|███████▎  | 11017/15000 [22:27<08:15,  8.03it/s]


 epoch: 11016 | train_loss: 0.25, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11017 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 73%|███████▎  | 11019/15000 [22:27<07:25,  8.93it/s]


 epoch: 11018 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11019 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       earth one can reasonably model earth mass temperature and rate of rotation as function of time allowing one to

target:      earth one can reasonably model earth mass temperature and rate of rotation as function of time allowing one to extrapolate

prediction:  earth one can reasonably model earth mass temperature and rate of rotation as function of time allowing one to the


 73%|███████▎  | 11022/15000 [22:28<07:14,  9.15it/s]


 epoch: 11020 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11021 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11022 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 73%|███████▎  | 11024/15000 [22:28<06:46,  9.79it/s]


 epoch: 11023 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11024 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11025 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 74%|███████▎  | 11028/15000 [22:28<07:32,  8.79it/s]


 epoch: 11026 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11027 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11028 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 74%|███████▎  | 11030/15000 [22:29<07:17,  9.07it/s]


 epoch: 11029 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

input:       and related concepts existential psychology emphasizes the need to understand client total orientation towards the world existential psychology is

target:      and related concepts existential psychology emphasizes the need to understand client total orientation towards the world existential psychology is opposed

prediction:  and related concepts existential psychology emphasizes the need to understand client total orientation towards the world existential psychology is the

 epoch: 11030 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11031 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 74%|███████▎  | 11032/15000 [22:29<06:50,  9.67it/s]



 epoch: 11032 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11033 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.1%


 74%|███████▎  | 11036/15000 [22:29<06:18, 10.47it/s]


 epoch: 11034 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%

 epoch: 11035 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 11036 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 74%|███████▎  | 11038/15000 [22:29<06:21, 10.40it/s]


 epoch: 11037 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 11038 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 74%|███████▎  | 11040/15000 [22:30<11:09,  5.91it/s]


 epoch: 11039 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%

input:       the system gets the state described in the configuration dsc configurations are idempotent the local configuration manager lcm periodically

target:      the system gets the state described in the configuration dsc configurations are idempotent the local configuration manager lcm periodically polls

prediction:  the system gets the state described in the configuration dsc configurations are idempotent the local configuration manager lcm periodically the

 epoch: 11040 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 74%|███████▎  | 11042/15000 [22:30<09:35,  6.87it/s]


 epoch: 11041 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11042 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11043 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 74%|███████▎  | 11046/15000 [22:31<07:22,  8.94it/s]


 epoch: 11044 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11045 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11046 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 74%|███████▎  | 11048/15000 [22:31<06:48,  9.68it/s]


 epoch: 11047 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11048 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11049 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 74%|███████▎  | 11050/15000 [22:31<07:00,  9.38it/s]


input:       whom survived infancy who were much younger than he the last was born when leonardo was years old and

target:      whom survived infancy who were much younger than he the last was born when leonardo was years old and with

prediction:  whom survived infancy who were much younger than he the last was born when leonardo was years old and the

 epoch: 11050 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11051 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 74%|███████▎  | 11052/15000 [22:31<06:49,  9.63it/s]


 epoch: 11052 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 74%|███████▎  | 11054/15000 [22:32<09:43,  6.77it/s]


 epoch: 11053 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 11054 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11055 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 74%|███████▎  | 11058/15000 [22:32<07:32,  8.72it/s]


 epoch: 11056 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 11057 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11058 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 74%|███████▎  | 11060/15000 [22:32<07:24,  8.86it/s]


 epoch: 11059 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

input:       inner core these advances led to the development of layered model of the earth with crust and lithosphere on

target:      inner core these advances led to the development of layered model of the earth with crust and lithosphere on top

prediction:  inner core these advances led to the development of layered model of the earth with crust and lithosphere on the

 epoch: 11060 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 74%|███████▎  | 11062/15000 [22:32<06:56,  9.45it/s]


 epoch: 11061 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11062 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11063 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 74%|███████▍  | 11066/15000 [22:33<06:29, 10.10it/s]


 epoch: 11064 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11065 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11066 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11067 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 74%|███████▍  | 11069/15000 [22:33<09:02,  7.24it/s]


 epoch: 11068 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11069 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       hold the religion taught them by their parents throughout their life many religions feature adversarial elements punishing apostasy for

target:      hold the religion taught them by their parents throughout their life many religions feature adversarial elements punishing apostasy for instance


 74%|███████▍  | 11072/15000 [22:34<07:52,  8.32it/s]


prediction:  hold the religion taught them by their parents throughout their life many religions feature adversarial elements punishing apostasy for the

 epoch: 11070 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11071 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11072 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 74%|███████▍  | 11075/15000 [22:34<06:55,  9.44it/s]


 epoch: 11073 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11074 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11075 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 74%|███████▍  | 11077/15000 [22:34<06:34,  9.94it/s]


 epoch: 11076 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11077 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11078 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 74%|███████▍  | 11079/15000 [22:34<06:30, 10.05it/s]


 epoch: 11079 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%

input:       is the most reproduced religious painting of all time and his vitruvian man drawing is also regarded as cultural

target:      is the most reproduced religious painting of all time and his vitruvian man drawing is also regarded as cultural icon

prediction:  is the most reproduced religious painting of all time and his vitruvian man drawing is also regarded as cultural the

 epoch: 11080 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 74%|███████▍  | 11083/15000 [22:35<09:35,  6.81it/s]


 epoch: 11081 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11082 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11083 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 74%|███████▍  | 11086/15000 [22:35<07:55,  8.23it/s]


 epoch: 11084 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11085 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11086 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 74%|███████▍  | 11088/15000 [22:36<07:11,  9.06it/s]


 epoch: 11087 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11088 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11089 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 74%|███████▍  | 11090/15000 [22:36<07:04,  9.21it/s]


input:       in the form of horse head with alberti leonardo visited the home of the medici and through them came

target:      in the form of horse head with alberti leonardo visited the home of the medici and through them came to

prediction:  in the form of horse head with alberti leonardo visited the home of the medici and through them came the

 epoch: 11090 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11091 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 74%|███████▍  | 11094/15000 [22:36<06:30, 10.02it/s]


 epoch: 11092 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11093 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11094 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11095 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 74%|███████▍  | 11097/15000 [22:37<08:16,  7.86it/s]


 epoch: 11096 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11097 | train_loss: 0.26, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 74%|███████▍  | 11099/15000 [22:37<07:48,  8.33it/s]


 epoch: 11098 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11099 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:      

 74%|███████▍  | 11100/15000 [22:37<08:49,  7.36it/s]

 city tenochtitlan was located further north in the valley of mexico the aztecs were conquered in by hern cort

target:      city tenochtitlan was located further north in the valley of mexico the aztecs were conquered in by hern cort during

prediction:  city tenochtitlan was located further north in the valley of mexico the aztecs were conquered in by hern cort the

 epoch: 11100 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 74%|███████▍  | 11102/15000 [22:37<08:08,  7.98it/s]


 epoch: 11101 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11102 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 74%|███████▍  | 11104/15000 [22:38<07:55,  8.19it/s]


 epoch: 11103 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11104 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 74%|███████▍  | 11106/15000 [22:38<08:34,  7.57it/s]


 epoch: 11105 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11106 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 74%|███████▍  | 11108/15000 [22:38<08:22,  7.75it/s]


 epoch: 11107 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11108 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 74%|███████▍  | 11109/15000 [22:38<08:31,  7.61it/s]


 epoch: 11109 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

input:       to disclose its loot box gambling odds in november an update to the competitive matchmaking was announced called the

target:      to disclose its loot box gambling odds in november an update to the competitive matchmaking was announced called the trust

prediction:  to disclose its loot box gambling odds in november an update to the competitive matchmaking was announced called the the


 74%|███████▍  | 11111/15000 [22:39<15:02,  4.31it/s]


 epoch: 11110 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11111 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 74%|███████▍  | 11113/15000 [22:39<11:22,  5.69it/s]


 epoch: 11112 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11113 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 74%|███████▍  | 11115/15000 [22:40<09:38,  6.71it/s]


 epoch: 11114 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11115 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 74%|███████▍  | 11117/15000 [22:40<08:40,  7.46it/s]


 epoch: 11116 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11117 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 74%|███████▍  | 11119/15000 [22:40<08:24,  7.69it/s]


 epoch: 11118 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11119 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 74%|███████▍  | 11120/15000 [22:40<09:24,  6.87it/s]


input:       increase revenue for the platform later five day testing period began during the testing period streaming was for select

target:      increase revenue for the platform later five day testing period began during the testing period streaming was for select group

prediction:  increase revenue for the platform later five day testing period began during the testing period streaming was for select the

 epoch: 11120 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 74%|███████▍  | 11122/15000 [22:40<08:41,  7.44it/s]


 epoch: 11121 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 11122 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%


 74%|███████▍  | 11124/15000 [22:41<07:58,  8.10it/s]


 epoch: 11123 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11124 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%


 74%|███████▍  | 11127/15000 [22:41<11:23,  5.67it/s]


 epoch: 11125 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11126 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11127 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%


 74%|███████▍  | 11128/15000 [22:41<10:09,  6.35it/s]


 epoch: 11128 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11129 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       th to th centuries tended to differ in how to continue the boundary beyond the don bend at kalach

target:      th to th centuries tended to differ in how to continue the boundary beyond the don bend at kalach na

prediction:  th to th centuries tended to differ in how to continue the boundary beyond the don bend at kalach the


 74%|███████▍  | 11132/15000 [22:42<07:44,  8.33it/s]


 epoch: 11130 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11131 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11132 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 74%|███████▍  | 11134/15000 [22:42<07:07,  9.05it/s]


 epoch: 11133 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11134 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11135 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 74%|███████▍  | 11138/15000 [22:42<06:27,  9.96it/s]


 epoch: 11136 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11137 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11138 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 74%|███████▍  | 11140/15000 [22:43<11:02,  5.82it/s]


 epoch: 11139 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       by french explorer jules dumont urville many scholars now replace those categories with green terms since the early but

target:      by french explorer jules dumont urville many scholars now replace those categories with green terms since the early but the

prediction:  by french explorer jules dumont urville many scholars now replace those categories with green terms since the early but the

 epoch: 11140 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11141 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 74%|███████▍  | 11144/15000 [22:43<08:12,  7.83it/s]


 epoch: 11142 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11143 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 11144 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 74%|███████▍  | 11146/15000 [22:44<07:33,  8.49it/s]


 epoch: 11145 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.8%

 epoch: 11146 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11147 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 74%|███████▍  | 11148/15000 [22:44<06:59,  9.17it/s]


 epoch: 11148 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11149 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

input:       to reddit ceo steve huffman expressed concern about the spread of covid misinformation on the platform in july reddit

target:      to reddit ceo steve huffman expressed concern about the spread of covid misinformation on the platform in july reddit banned

prediction:  to reddit ceo steve huffman expressed concern about the spread of covid misinformation on the platform in july reddit the


 74%|███████▍  | 11152/15000 [22:44<06:46,  9.47it/s]


 epoch: 11150 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11151 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.8%

 epoch: 11152 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 74%|███████▍  | 11154/15000 [22:45<08:09,  7.86it/s]


 epoch: 11153 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11154 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11155 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 74%|███████▍  | 11158/15000 [22:45<06:49,  9.39it/s]


 epoch: 11156 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11157 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11158 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 74%|███████▍  | 11160/15000 [22:45<06:49,  9.37it/s]


 epoch: 11159 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

input:       within the city of rome itself all armed forces in the city formerly under the control of the prefects

target:      within the city of rome itself all armed forces in the city formerly under the control of the prefects were

prediction:  within the city of rome itself all armed forces in the city formerly under the control of the prefects the

 epoch: 11160 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 74%|███████▍  | 11162/15000 [22:45<06:30,  9.83it/s]


 epoch: 11161 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11162 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11163 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 74%|███████▍  | 11166/15000 [22:46<06:05, 10.50it/s]


 epoch: 11164 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11165 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11166 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 74%|███████▍  | 11168/15000 [22:46<06:15, 10.20it/s]


 epoch: 11167 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11168 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.7%

 epoch: 11169 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 74%|███████▍  | 11170/15000 [22:46<06:34,  9.71it/s]


input:       congregates in large schools is the keystone species of the ecosystem of the southern ocean being an important food

target:      congregates in large schools is the keystone species of the ecosystem of the southern ocean being an important food organism

prediction:  congregates in large schools is the keystone species of the ecosystem of the southern ocean being an important food the

 epoch: 11170 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11171 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 74%|███████▍  | 11174/15000 [22:46<06:05, 10.46it/s]


 epoch: 11172 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11173 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11174 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 75%|███████▍  | 11176/15000 [22:47<05:57, 10.68it/s]


 epoch: 11175 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11176 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11177 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 75%|███████▍  | 11180/15000 [22:47<05:57, 10.69it/s]


 epoch: 11178 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11179 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       who are the original inhabitants of japan hokkaido the kuril islands and the southern part of sakhalin in their

target:      who are the original inhabitants of japan hokkaido the kuril islands and the southern part of sakhalin in their book

prediction:  who are the original inhabitants of japan hokkaido the kuril islands and the southern part of sakhalin in their the

 epoch: 11180 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 75%|███████▍  | 11182/15000 [22:47<08:17,  7.68it/s]


 epoch: 11181 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11182 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11183 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 75%|███████▍  | 11186/15000 [22:48<06:55,  9.18it/s]


 epoch: 11184 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11185 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 11186 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%


 75%|███████▍  | 11188/15000 [22:48<06:30,  9.77it/s]


 epoch: 11187 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11188 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11189 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 75%|███████▍  | 11190/15000 [22:48<06:36,  9.61it/s]


input:       greek alphabet with the addition of some demotic signs although formal hieroglyphs were used in ceremonial role until the

target:      greek alphabet with the addition of some demotic signs although formal hieroglyphs were used in ceremonial role until the fourth

prediction:  greek alphabet with the addition of some demotic signs although formal hieroglyphs were used in ceremonial role until the the

 epoch: 11190 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11191 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 75%|███████▍  | 11194/15000 [22:49<06:07, 10.35it/s]


 epoch: 11192 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11193 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11194 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 75%|███████▍  | 11196/15000 [22:49<10:14,  6.19it/s]


 epoch: 11195 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.7%

 epoch: 11196 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11197 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 75%|███████▍  | 11198/15000 [22:49<08:50,  7.16it/s]


 epoch: 11198 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11199 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%

input:       largest measured by ppp adjusted gdp mercer quality of living survey ranks sydney tenth in the world in terms

target:      largest measured by ppp adjusted gdp mercer quality of living survey ranks sydney tenth in the world in terms of

prediction:  largest measured by ppp adjusted gdp mercer quality of living survey ranks sydney tenth in the world in terms the


 75%|███████▍  | 11202/15000 [22:50<07:31,  8.42it/s]


 epoch: 11200 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11201 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11202 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 75%|███████▍  | 11204/15000 [22:50<06:56,  9.11it/s]


 epoch: 11203 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11204 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11205 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.0%


 75%|███████▍  | 11208/15000 [22:50<06:19,  9.99it/s]


 epoch: 11206 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11207 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11208 | train_loss: 0.19, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.6%


 75%|███████▍  | 11210/15000 [22:51<10:25,  6.06it/s]


 epoch: 11209 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.0%

input:       nast publications acquired the site in october in reddit became an independent subsidiary of cond nast parent company advance

target:      nast publications acquired the site in october in reddit became an independent subsidiary of cond nast parent company advance publications

prediction:  nast publications acquired the site in october in reddit became an independent subsidiary of cond nast parent company advance the

 epoch: 11210 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 75%|███████▍  | 11212/15000 [22:51<09:48,  6.44it/s]


 epoch: 11211 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11212 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 75%|███████▍  | 11214/15000 [22:51<08:49,  7.15it/s]


 epoch: 11213 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11214 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 75%|███████▍  | 11216/15000 [22:52<07:58,  7.91it/s]


 epoch: 11215 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11216 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 75%|███████▍  | 11218/15000 [22:52<08:15,  7.63it/s]


 epoch: 11217 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.0%

 epoch: 11218 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.3%


 75%|███████▍  | 11220/15000 [22:52<09:24,  6.70it/s]


 epoch: 11219 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

input:       the new genetic variants also to the other populations depending on how far two species have diverged since their

target:      the new genetic variants also to the other populations depending on how far two species have diverged since their most

prediction:  the new genetic variants also to the other populations depending on how far two species have diverged since their the

 epoch: 11220 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 75%|███████▍  | 11222/15000 [22:53<08:50,  7.12it/s]


 epoch: 11221 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11222 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 75%|███████▍  | 11223/15000 [22:53<08:32,  7.37it/s]


 epoch: 11223 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 75%|███████▍  | 11225/15000 [22:53<13:31,  4.65it/s]


 epoch: 11224 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11225 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 75%|███████▍  | 11227/15000 [22:54<10:13,  6.15it/s]


 epoch: 11226 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11227 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 75%|███████▍  | 11229/15000 [22:54<08:34,  7.33it/s]


 epoch: 11228 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11229 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

input:       rise of darwinian and freudian thinking instinct also came to be seen as primary source of motivation according to


 75%|███████▍  | 11230/15000 [22:54<09:10,  6.85it/s]


target:      rise of darwinian and freudian thinking instinct also came to be seen as primary source of motivation according to drive

prediction:  rise of darwinian and freudian thinking instinct also came to be seen as primary source of motivation according to the

 epoch: 11230 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11231 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 75%|███████▍  | 11233/15000 [22:54<07:27,  8.42it/s]


 epoch: 11232 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11233 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 75%|███████▍  | 11235/15000 [22:54<07:05,  8.85it/s]


 epoch: 11234 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11235 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 75%|███████▍  | 11237/15000 [22:55<07:01,  8.93it/s]


 epoch: 11236 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 11237 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 75%|███████▍  | 11238/15000 [22:55<13:49,  4.53it/s]


 epoch: 11238 | train_loss: 0.22, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11239 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

input:       for the first time in public at rome coinciding with the creation of the most comprehensive political geography that

target:      for the first time in public at rome coinciding with the creation of the most comprehensive political geography that survives

prediction:  for the first time in public at rome coinciding with the creation of the most comprehensive political geography that the


 75%|███████▍  | 11242/15000 [22:56<08:36,  7.28it/s]


 epoch: 11240 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11241 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11242 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.4%


 75%|███████▍  | 11245/15000 [22:56<07:14,  8.65it/s]


 epoch: 11243 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11244 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11245 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 75%|███████▍  | 11248/15000 [22:56<06:45,  9.26it/s]


 epoch: 11246 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11247 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11248 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 75%|███████▌  | 11250/15000 [22:56<06:47,  9.19it/s]


 epoch: 11249 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       of elements later known as genes mendel laws of inheritance eventually supplanted most of darwin pangenesis theory august weismann

target:      of elements later known as genes mendel laws of inheritance eventually supplanted most of darwin pangenesis theory august weismann made

prediction:  of elements later known as genes mendel laws of inheritance eventually supplanted most of darwin pangenesis theory august weismann the

 epoch: 11250 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%


 75%|███████▌  | 11251/15000 [22:56<06:42,  9.32it/s]


 epoch: 11251 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 75%|███████▌  | 11253/15000 [22:57<08:01,  7.78it/s]


 epoch: 11252 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11253 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11254 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 75%|███████▌  | 11257/15000 [22:57<06:27,  9.66it/s]


 epoch: 11255 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11256 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11257 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 75%|███████▌  | 11259/15000 [22:57<06:09, 10.12it/s]


 epoch: 11258 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11259 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

input:       somewhat useful for some other function in the process one example is the african lizard holaspis guentheri which developed

target:      somewhat useful for some other function in the process one example is the african lizard holaspis guentheri which developed an

prediction:  somewhat useful for some other function in the process one example is the african lizard holaspis guentheri which developed the


 75%|███████▌  | 11262/15000 [22:58<06:28,  9.62it/s]


 epoch: 11260 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11261 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11262 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 75%|███████▌  | 11264/15000 [22:58<06:40,  9.33it/s]


 epoch: 11263 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11264 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 75%|███████▌  | 11265/15000 [22:58<06:49,  9.13it/s]


 epoch: 11265 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 75%|███████▌  | 11268/15000 [22:59<10:39,  5.83it/s]


 epoch: 11266 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11267 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11268 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 75%|███████▌  | 11270/15000 [22:59<09:21,  6.64it/s]


 epoch: 11269 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       rapid growth of the internet in the mid created opportunities for new languages perl originally unix scripting tool first

target:      rapid growth of the internet in the mid created opportunities for new languages perl originally unix scripting tool first released

prediction:  rapid growth of the internet in the mid created opportunities for new languages perl originally unix scripting tool first the

 epoch: 11270 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 75%|███████▌  | 11272/15000 [22:59<07:49,  7.94it/s]


 epoch: 11271 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

 epoch: 11272 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11273 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 75%|███████▌  | 11275/15000 [22:59<06:47,  9.15it/s]


 epoch: 11274 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11275 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11276 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 75%|███████▌  | 11279/15000 [23:00<06:06, 10.16it/s]


 epoch: 11277 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11278 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11279 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

input:       of life comedy is the third form of literature being the most divorced from true mimesis tragedy is the

target:      of life comedy is the third form of literature being the most divorced from true mimesis tragedy is the truest

prediction:  of life comedy is the third form of literature being the most divorced from true mimesis tragedy is the the

 epoch: 11280 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 75%|███████▌  | 11283/15000 [23:01<09:05,  6.81it/s]


 epoch: 11281 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11282 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%

 epoch: 11283 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.1%


 75%|███████▌  | 11286/15000 [23:01<07:31,  8.23it/s]


 epoch: 11284 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11285 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11286 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 75%|███████▌  | 11288/15000 [23:01<06:57,  8.90it/s]


 epoch: 11287 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11288 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11289 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 75%|███████▌  | 11290/15000 [23:01<07:02,  8.78it/s]


input:       comprehensive and well rounded education while vocational trainings focus more on specific practical skills within field the curricula also

target:      comprehensive and well rounded education while vocational trainings focus more on specific practical skills within field the curricula also cover

prediction:  comprehensive and well rounded education while vocational trainings focus more on specific practical skills within field the curricula also the

 epoch: 11290 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.4%

 epoch: 11291 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 75%|███████▌  | 11294/15000 [23:02<06:16,  9.84it/s]


 epoch: 11292 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11293 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.2%

 epoch: 11294 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 75%|███████▌  | 11297/15000 [23:02<06:43,  9.17it/s]


 epoch: 11295 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11296 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11297 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 75%|███████▌  | 11298/15000 [23:02<06:42,  9.19it/s]


 epoch: 11298 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11299 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

input:       japan with publications such as clairvoyance and thoughtography by tomokichi fukurai but it was mostly shunned by as discipline

target:      japan with publications such as clairvoyance and thoughtography by tomokichi fukurai but it was mostly shunned by as discipline psychology

prediction:  japan with publications such as clairvoyance and thoughtography by tomokichi fukurai but it was mostly shunned by as discipline the


 75%|███████▌  | 11301/15000 [23:03<06:32,  9.42it/s]


 epoch: 11300 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11301 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11302 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 75%|███████▌  | 11305/15000 [23:03<05:57, 10.32it/s]


 epoch: 11303 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11304 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11305 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.1%


 75%|███████▌  | 11307/15000 [23:03<05:54, 10.42it/s]


 epoch: 11306 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 11307 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 11308 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 75%|███████▌  | 11310/15000 [23:04<09:56,  6.19it/s]


 epoch: 11309 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

input:       with the creation of marxist guerrillas farc ep and then involved several illegal armed groups of leftist leaning ideology

target:      with the creation of marxist guerrillas farc ep and then involved several illegal armed groups of leftist leaning ideology as

prediction:  with the creation of marxist guerrillas farc ep and then involved several illegal armed groups of leftist leaning ideology the

 epoch: 11310 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 75%|███████▌  | 11312/15000 [23:04<08:42,  7.06it/s]


 epoch: 11311 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 11312 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 75%|███████▌  | 11314/15000 [23:04<07:57,  7.71it/s]


 epoch: 11313 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11314 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 75%|███████▌  | 11316/15000 [23:05<07:45,  7.91it/s]


 epoch: 11315 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11316 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 75%|███████▌  | 11319/15000 [23:05<06:43,  9.13it/s]


 epoch: 11317 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11318 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11319 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 75%|███████▌  | 11320/15000 [23:05<07:41,  7.97it/s]


input:       fundamental scientific disciplines with its main goal being to understand how the universe behaves scientist who specializes in

target:      fundamental scientific disciplines with its main goal being to understand how the universe behaves scientist who specializes in the

prediction:  fundamental scientific disciplines with its main goal being to understand how the universe behaves scientist who specializes in the

 epoch: 11320 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 75%|███████▌  | 11322/15000 [23:05<07:29,  8.18it/s]


 epoch: 11321 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11322 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 75%|███████▌  | 11323/15000 [23:05<07:43,  7.94it/s]


 epoch: 11323 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 76%|███████▌  | 11325/15000 [23:06<12:52,  4.76it/s]


 epoch: 11324 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11325 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 76%|███████▌  | 11327/15000 [23:06<10:29,  5.83it/s]


 epoch: 11326 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11327 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 76%|███████▌  | 11329/15000 [23:07<08:53,  6.89it/s]


 epoch: 11328 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11329 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 76%|███████▌  | 11330/15000 [23:07<09:32,  6.41it/s]


input:       linguistics saussure also introduced several basic dimensions of linguistic analysis that are still fundamental in many contemporary linguistic theories

target:      linguistics saussure also introduced several basic dimensions of linguistic analysis that are still fundamental in many contemporary linguistic theories such

prediction:  linguistics saussure also introduced several basic dimensions of linguistic analysis that are still fundamental in many contemporary linguistic theories the

 epoch: 11330 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.2%


 76%|███████▌  | 11332/15000 [23:07<08:19,  7.34it/s]


 epoch: 11331 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11332 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%


 76%|███████▌  | 11334/15000 [23:07<07:45,  7.87it/s]


 epoch: 11333 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11334 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 76%|███████▌  | 11336/15000 [23:07<07:39,  7.98it/s]


 epoch: 11335 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11336 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 76%|███████▌  | 11337/15000 [23:08<07:44,  7.89it/s]


 epoch: 11337 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 76%|███████▌  | 11338/15000 [23:08<13:58,  4.37it/s]


 epoch: 11338 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11339 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       not commensurate with the climate threats and risks it faces asia has the largest continental economy in the world

target:      not commensurate with the climate threats and risks it faces asia has the largest continental economy in the world by

prediction: 

 76%|███████▌  | 11341/15000 [23:08<09:49,  6.21it/s]

 not commensurate with the climate threats and risks it faces asia has the largest continental economy in the world the

 epoch: 11340 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11341 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 76%|███████▌  | 11343/15000 [23:09<08:13,  7.41it/s]


 epoch: 11342 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11343 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11344 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 76%|███████▌  | 11347/15000 [23:09<06:42,  9.07it/s]


 epoch: 11345 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11346 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11347 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 76%|███████▌  | 11349/15000 [23:09<06:32,  9.30it/s]


 epoch: 11348 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11349 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

input:       the nature of the divine for ordinary romans religion was part of daily life each home had household shrine

target:      the nature of the divine for ordinary romans religion was part of daily life each home had household shrine to

prediction:  the nature of the divine for ordinary romans religion was part of daily life each home had household shrine the


 76%|███████▌  | 11351/15000 [23:09<07:01,  8.65it/s]


 epoch: 11350 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11351 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 76%|███████▌  | 11354/15000 [23:10<10:40,  5.69it/s]


 epoch: 11352 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%

 epoch: 11353 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11354 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 76%|███████▌  | 11357/15000 [23:10<07:57,  7.62it/s]


 epoch: 11355 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11356 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11357 | train_loss: 0.19, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%


 76%|███████▌  | 11358/15000 [23:11<07:37,  7.96it/s]


 epoch: 11358 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11359 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

input:       bang results from the wmap team in are in accordance with universe that consists of dark energy dark matter

target:      bang results from the wmap team in are in accordance with universe that consists of dark energy dark matter regular

prediction:  bang results from the wmap team in are in accordance with universe that consists of dark energy dark matter of


 76%|███████▌  | 11363/15000 [23:11<06:22,  9.51it/s]


 epoch: 11360 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%

 epoch: 11361 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11362 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11363 | train_loss: 0.21, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%


 76%|███████▌  | 11365/15000 [23:11<06:21,  9.54it/s]


 epoch: 11364 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11365 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11366 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 76%|███████▌  | 11369/15000 [23:12<07:07,  8.49it/s]


 epoch: 11367 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11368 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11369 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 76%|███████▌  | 11370/15000 [23:12<07:33,  8.01it/s]


input:       rebuilt under the administration of themistocles and was adorned with public buildings by cimon and especially by pericles in

target:      rebuilt under the administration of themistocles and was adorned with public buildings by cimon and especially by pericles in whose

prediction:  rebuilt under the administration of themistocles and was adorned with public buildings by cimon and especially by pericles in the

 epoch: 11370 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.7%

 epoch: 11371 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 76%|███████▌  | 11374/15000 [23:12<06:12,  9.74it/s]


 epoch: 11372 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%

 epoch: 11373 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11374 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 76%|███████▌  | 11376/15000 [23:13<06:08,  9.85it/s]


 epoch: 11375 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11376 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11377 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 76%|███████▌  | 11378/15000 [23:13<05:57, 10.14it/s]


 epoch: 11378 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11379 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       an hour with lee evans sold million worth of tickets for his tour in day the biggest first day

target:      an hour with lee evans sold million worth of tickets for his tour in day the biggest first day sale

prediction:  an hour with lee evans sold million worth of tickets for his tour in day the biggest first day the


 76%|███████▌  | 11380/15000 [23:13<06:11,  9.75it/s]


 epoch: 11380 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%


 76%|███████▌  | 11382/15000 [23:14<09:34,  6.30it/s]


 epoch: 11381 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 11382 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 76%|███████▌  | 11384/15000 [23:14<08:09,  7.39it/s]


 epoch: 11383 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11384 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11385 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.21, test_acc: 96.9%


 76%|███████▌  | 11388/15000 [23:14<06:26,  9.35it/s]


 epoch: 11386 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11387 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11388 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 76%|███████▌  | 11390/15000 [23:14<06:29,  9.27it/s]


 epoch: 11389 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       ways from languages that have evolved through usage significant difference is that programming language can be fully described and

target:      ways from languages that have evolved through usage significant difference is that programming language can be fully described and studied

prediction:  ways from languages that have evolved through usage significant difference is that programming language can be fully described and the

 epoch: 11390 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 76%|███████▌  | 11392/15000 [23:14<06:13,  9.66it/s]


 epoch: 11391 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11392 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11393 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 76%|███████▌  | 11394/15000 [23:15<06:02,  9.94it/s]


 epoch: 11394 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 76%|███████▌  | 11396/15000 [23:15<10:07,  5.94it/s]


 epoch: 11395 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11396 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 11397 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 76%|███████▌  | 11398/15000 [23:15<08:36,  6.98it/s]


 epoch: 11398 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11399 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

input:       on torturing their nonhuman primates decade after decade invariably proving what we all knew in advance that social creatures

target:      on torturing their nonhuman primates decade after decade invariably proving what we all knew in advance that social creatures can

prediction:  on torturing their nonhuman primates decade after decade invariably proving what we all knew in advance that social creatures the


 76%|███████▌  | 11402/15000 [23:16<07:14,  8.28it/s]


 epoch: 11400 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11401 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11402 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 76%|███████▌  | 11404/15000 [23:16<06:41,  8.95it/s]


 epoch: 11403 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

 epoch: 11404 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11405 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%


 76%|███████▌  | 11408/15000 [23:16<06:06,  9.81it/s]


 epoch: 11406 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11407 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11408 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 76%|███████▌  | 11410/15000 [23:17<06:24,  9.33it/s]


 epoch: 11409 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

input:       are groups of actually or potentially interbreeding natural populations which are reproductively isolated from other such groups despite its

target:      are groups of actually or potentially interbreeding natural populations which are reproductively isolated from other such groups despite its wide

prediction:  are groups of actually or potentially interbreeding natural populations which are reproductively isolated from other such groups despite its the

 epoch: 11410 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11411 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 76%|███████▌  | 11414/15000 [23:17<05:46, 10.35it/s]


 epoch: 11412 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%

 epoch: 11413 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11414 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 76%|███████▌  | 11416/15000 [23:17<05:40, 10.51it/s]


 epoch: 11415 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11416 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11417 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 76%|███████▌  | 11418/15000 [23:17<05:32, 10.78it/s]


 epoch: 11418 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11419 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       from quebec to new england the great migration which began around resulted in millions of african americans leaving the

target:      from quebec to new england the great migration which began around resulted in millions of african americans leaving the rural

prediction:  from quebec to new england the great migration which began around resulted in millions of african americans leaving the the


 76%|███████▌  | 11422/15000 [23:18<05:51, 10.19it/s]


 epoch: 11420 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11421 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11422 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%


 76%|███████▌  | 11424/15000 [23:18<09:54,  6.01it/s]


 epoch: 11423 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11424 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.3%

 epoch: 11425 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 76%|███████▌  | 11426/15000 [23:19<08:56,  6.67it/s]



 epoch: 11426 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 76%|███████▌  | 11428/15000 [23:19<08:06,  7.34it/s]


 epoch: 11427 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11428 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 76%|███████▌  | 11430/15000 [23:19<08:37,  6.90it/s]


 epoch: 11429 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

input:       immigrant population the united states is developed country that has the highest disposable income per capita in the world

target:      immigrant population the united states is developed country that has the highest disposable income per capita in the world its

prediction:  immigrant population the united states is developed country that has the highest disposable income per capita in the world the

 epoch: 11430 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%


 76%|███████▌  | 11432/15000 [23:19<08:02,  7.40it/s]


 epoch: 11431 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11432 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%


 76%|███████▌  | 11434/15000 [23:20<07:44,  7.67it/s]


 epoch: 11433 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11434 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 76%|███████▌  | 11436/15000 [23:20<07:22,  8.06it/s]


 epoch: 11435 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11436 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 76%|███████▌  | 11437/15000 [23:20<07:13,  8.23it/s]


 epoch: 11437 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%


 76%|███████▋  | 11439/15000 [23:21<12:02,  4.93it/s]


 epoch: 11438 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11439 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:      

 76%|███████▋  | 11440/15000 [23:21<11:38,  5.10it/s]

 considered as forming fifth grand division of the world he also viewed oceania as covering australia new zealand the

target:      considered as forming fifth grand division of the world he also viewed oceania as covering australia new zealand the malay

prediction:  considered as forming fifth grand division of the world he also viewed oceania as covering australia new zealand the the

 epoch: 11440 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 76%|███████▋  | 11442/15000 [23:21<09:23,  6.31it/s]


 epoch: 11441 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11442 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 76%|███████▋  | 11444/15000 [23:21<08:40,  6.84it/s]


 epoch: 11443 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11444 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 76%|███████▋  | 11446/15000 [23:22<07:12,  8.21it/s]


 epoch: 11445 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11446 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11447 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 76%|███████▋  | 11449/15000 [23:22<06:33,  9.03it/s]


 epoch: 11448 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%

 epoch: 11449 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

input:       republican offices he renounced his consulship in bc but retained his consular imperium leading to second compromise between augustus


 76%|███████▋  | 11450/15000 [23:22<07:15,  8.15it/s]


target:      republican offices he renounced his consulship in bc but retained his consular imperium leading to second compromise between augustus and

prediction:  republican offices he renounced his consulship in bc but retained his consular imperium leading to second compromise between augustus the

 epoch: 11450 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11451 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 76%|███████▋  | 11454/15000 [23:23<06:49,  8.66it/s]


 epoch: 11452 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11453 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11454 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 76%|███████▋  | 11456/15000 [23:23<06:24,  9.21it/s]


 epoch: 11455 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%

 epoch: 11456 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11457 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 76%|███████▋  | 11458/15000 [23:23<06:15,  9.43it/s]


 epoch: 11458 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11459 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

input:       caesar would rise up to replace the augustus and select new caesar on may diocletian and maximian abdicated in

target:      caesar would rise up to replace the augustus and select new caesar on may diocletian and maximian abdicated in favour

prediction:  caesar would rise up to replace the augustus and select new caesar on may diocletian and maximian abdicated in the


 76%|███████▋  | 11462/15000 [23:23<06:03,  9.74it/s]


 epoch: 11460 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11461 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11462 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 76%|███████▋  | 11464/15000 [23:24<05:51, 10.06it/s]


 epoch: 11463 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.8%

 epoch: 11464 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11465 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 76%|███████▋  | 11468/15000 [23:24<08:18,  7.08it/s]


 epoch: 11466 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11467 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11468 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 76%|███████▋  | 11470/15000 [23:25<07:41,  7.64it/s]


 epoch: 11469 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       neither time nor space limited the empire in virgil aeneid limitless empire is said to be granted to the

target:      neither time nor space limited the empire in virgil aeneid limitless empire is said to be granted to the romans

prediction:  neither time nor space limited the empire in virgil aeneid limitless empire is said to be granted to the the

 epoch: 11470 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 76%|███████▋  | 11472/15000 [23:25<07:04,  8.31it/s]


 epoch: 11471 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11472 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

 epoch: 11473 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.1%


 77%|███████▋  | 11476/15000 [23:25<06:06,  9.61it/s]


 epoch: 11474 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11475 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11476 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%


 77%|███████▋  | 11478/15000 [23:25<05:56,  9.88it/s]


 epoch: 11477 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11478 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11479 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

input:       in latin america guyana and suriname have the largest ethnic east indian community in many places indigenous people still

target:      in latin america guyana and suriname have the largest ethnic east indian community in many places indigenous people still practice

prediction:  in latin america guyana and suriname have the largest ethnic east indian community in many places indigenous people still the


 77%|███████▋  | 11482/15000 [23:26<08:41,  6.75it/s]


 epoch: 11480 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 11481 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11482 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%


 77%|███████▋  | 11484/15000 [23:26<07:50,  7.47it/s]


 epoch: 11483 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11484 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 77%|███████▋  | 11487/15000 [23:27<06:38,  8.82it/s]


 epoch: 11485 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11486 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11487 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 77%|███████▋  | 11489/15000 [23:27<06:10,  9.47it/s]


 epoch: 11488 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11489 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

input:       the jargons or styles of subcultures linguistic anthropologists and sociologists of language define communicative style as the ways that

target:      the jargons or styles of subcultures linguistic anthropologists and sociologists of language define communicative style as the ways that language

prediction:  the jargons or styles of subcultures linguistic anthropologists and sociologists of language define communicative style as the ways that the


 77%|███████▋  | 11491/15000 [23:27<06:28,  9.04it/s]


 epoch: 11490 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11491 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 11492 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 77%|███████▋  | 11495/15000 [23:27<05:45, 10.15it/s]


 epoch: 11493 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11494 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11495 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 77%|███████▋  | 11497/15000 [23:28<05:35, 10.46it/s]


 epoch: 11496 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11497 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11498 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 77%|███████▋  | 11499/15000 [23:28<05:32, 10.52it/s]


 epoch: 11499 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

input:       copies in japan within the first two months of release according to an announcement by sce japan asia by

target:      copies in japan within the first two months of release according to an announcement by sce japan asia by january

prediction:  copies in japan within the first two months of release according to an announcement by sce japan asia by the

 epoch: 11500 | train_loss: 0.24, train_acc: 96.6% | test_loss: 0.21, test_acc: 97.3%


 77%|███████▋  | 11503/15000 [23:28<05:50,  9.98it/s]


 epoch: 11501 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11502 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11503 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 77%|███████▋  | 11505/15000 [23:28<05:44, 10.15it/s]


 epoch: 11504 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11505 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11506 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 77%|███████▋  | 11507/15000 [23:29<05:42, 10.18it/s]


 epoch: 11507 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 77%|███████▋  | 11509/15000 [23:29<09:36,  6.05it/s]


 epoch: 11508 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11509 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

input:       around mya north america became part of laurasia before it separated from eurasia as its own continent during the

target:      around mya north america became part of laurasia before it separated from eurasia as its own continent during the mid

prediction:  around mya north america became part of laurasia before it separated from eurasia as its own continent during the the


 77%|███████▋  | 11512/15000 [23:30<07:53,  7.37it/s]


 epoch: 11510 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.4%

 epoch: 11511 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11512 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 77%|███████▋  | 11515/15000 [23:30<06:51,  8.47it/s]


 epoch: 11513 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11514 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11515 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 77%|███████▋  | 11518/15000 [23:30<06:14,  9.29it/s]


 epoch: 11516 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11517 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11518 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 77%|███████▋  | 11520/15000 [23:30<06:23,  9.06it/s]


 epoch: 11519 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

input:       expands and cools other explanations of dark energy called phantom energy theories suggest that ultimately galaxy clusters stars planets

target:      expands and cools other explanations of dark energy called phantom energy theories suggest that ultimately galaxy clusters stars planets atoms

prediction:  expands and cools other explanations of dark energy called phantom energy theories suggest that ultimately galaxy clusters stars planets the

 epoch: 11520 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 77%|███████▋  | 11521/15000 [23:30<06:22,  9.10it/s]


 epoch: 11521 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 77%|███████▋  | 11524/15000 [23:31<08:53,  6.51it/s]


 epoch: 11522 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11523 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11524 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 77%|███████▋  | 11526/15000 [23:31<07:44,  7.49it/s]


 epoch: 11525 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11526 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 77%|███████▋  | 11528/15000 [23:32<07:01,  8.24it/s]


 epoch: 11527 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11528 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 77%|███████▋  | 11530/15000 [23:32<08:04,  7.16it/s]


 epoch: 11529 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       associates with difficult period of leonardo life as evidenced in his diary thought was learning to live was only

target:      associates with difficult period of leonardo life as evidenced in his diary thought was learning to live was only learning

prediction:  associates with difficult period of leonardo life as evidenced in his diary thought was learning to live was only the

 epoch: 11530 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 77%|███████▋  | 11532/15000 [23:32<07:46,  7.44it/s]


 epoch: 11531 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.7%

 epoch: 11532 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.25, test_acc: 96.8%


 77%|███████▋  | 11534/15000 [23:32<07:23,  7.81it/s]


 epoch: 11533 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11534 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 77%|███████▋  | 11535/15000 [23:32<07:20,  7.86it/s]


 epoch: 11535 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 77%|███████▋  | 11537/15000 [23:33<09:30,  6.07it/s]


 epoch: 11536 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11537 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.3%


 77%|███████▋  | 11539/15000 [23:33<08:34,  6.73it/s]


 epoch: 11538 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11539 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 77%|███████▋  | 11540/15000 [23:33<09:18,  6.20it/s]


input:       come to elicit the response the biologically potent stimulus elicits ivan pavlov known best for inducing dogs to salivate

target:      come to elicit the response the biologically potent stimulus elicits ivan pavlov known best for inducing dogs to salivate in

prediction:  come to elicit the response the biologically potent stimulus elicits ivan pavlov known best for inducing dogs to salivate the

 epoch: 11540 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 77%|███████▋  | 11542/15000 [23:34<08:01,  7.19it/s]


 epoch: 11541 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11542 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 77%|███████▋  | 11544/15000 [23:34<07:34,  7.61it/s]


 epoch: 11543 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11544 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%


 77%|███████▋  | 11546/15000 [23:34<07:12,  7.98it/s]


 epoch: 11545 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11546 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 77%|███████▋  | 11548/15000 [23:34<07:10,  8.02it/s]


 epoch: 11547 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11548 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 77%|███████▋  | 11549/15000 [23:34<07:18,  7.88it/s]


 epoch: 11549 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       the two modes of inheritance in the evolution of memes characterizing the darwinian mode as copying the instructions and

target:      the two modes of inheritance in the evolution of memes characterizing the darwinian mode as copying the instructions and the

prediction:  the two modes of inheritance in the evolution of memes characterizing the darwinian mode as copying the instructions and the


 77%|███████▋  | 11551/15000 [23:35<13:14,  4.34it/s]


 epoch: 11550 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11551 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 77%|███████▋  | 11553/15000 [23:36<10:28,  5.48it/s]


 epoch: 11552 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11553 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 77%|███████▋  | 11556/15000 [23:36<07:27,  7.69it/s]


 epoch: 11554 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11555 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 11556 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 77%|███████▋  | 11558/15000 [23:36<06:41,  8.58it/s]


 epoch: 11557 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11558 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11559 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 77%|███████▋  | 11560/15000 [23:36<06:54,  8.30it/s]


input:       to and served at least one term as an executive magistrate senator also had to meet minimum property requirement

target:      to and served at least one term as an executive magistrate senator also had to meet minimum property requirement of

prediction:  to and served at least one term as an executive magistrate senator also had to meet minimum property requirement the

 epoch: 11560 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 77%|███████▋  | 11562/15000 [23:36<06:42,  8.55it/s]


 epoch: 11561 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11562 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

 epoch: 11563 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 77%|███████▋  | 11565/15000 [23:37<06:58,  8.20it/s]


 epoch: 11564 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11565 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 77%|███████▋  | 11567/15000 [23:37<06:32,  8.74it/s]


 epoch: 11566 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11567 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 77%|███████▋  | 11568/15000 [23:37<06:23,  8.94it/s]


 epoch: 11568 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11569 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

input:       exotic physical phenomena that have not been observed in terrestrial laboratory experiments or incorporated into the standard model of

target:      exotic physical phenomena that have not been observed in terrestrial laboratory experiments or incorporated into the standard model of particle

prediction:  exotic physical phenomena that have not been observed in terrestrial laboratory experiments or incorporated into the standard model of the


 77%|███████▋  | 11572/15000 [23:38<06:02,  9.46it/s]


 epoch: 11570 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11571 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11572 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 77%|███████▋  | 11574/15000 [23:38<06:04,  9.41it/s]


 epoch: 11573 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11574 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11575 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 77%|███████▋  | 11578/15000 [23:38<05:30, 10.35it/s]


 epoch: 11576 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11577 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11578 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 77%|███████▋  | 11580/15000 [23:39<07:27,  7.64it/s]


 epoch: 11579 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       elitist old kingdom attitudes towards the gods the middle kingdom displayed an increase in expressions of personal piety middle

target:      elitist old kingdom attitudes towards the gods the middle kingdom displayed an increase in expressions of personal piety middle kingdom

prediction:  elitist old kingdom attitudes towards the gods the middle kingdom displayed an increase in expressions of personal piety middle the

 epoch: 11580 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 77%|███████▋  | 11584/15000 [23:39<06:15,  9.10it/s]


 epoch: 11581 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11582 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11583 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 11584 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 77%|███████▋  | 11586/15000 [23:39<05:58,  9.54it/s]


 epoch: 11585 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11586 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11587 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 77%|███████▋  | 11588/15000 [23:39<05:48,  9.79it/s]


 epoch: 11588 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11589 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.4%

input:       dominant languages in north america are english spanish and french danish is prevalent in greenland alongside greenlandic and dutch

target:      dominant languages in north america are english spanish and french danish is prevalent in greenland alongside greenlandic and dutch is

prediction:  dominant languages in north america are english spanish and french danish is prevalent in greenland alongside greenlandic and dutch the


 77%|███████▋  | 11591/15000 [23:40<05:57,  9.53it/s]


 epoch: 11590 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11591 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 77%|███████▋  | 11592/15000 [23:40<05:56,  9.57it/s]


 epoch: 11592 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 77%|███████▋  | 11595/15000 [23:41<09:13,  6.15it/s]


 epoch: 11593 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11594 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11595 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 77%|███████▋  | 11598/15000 [23:41<07:10,  7.90it/s]


 epoch: 11596 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11597 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11598 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 77%|███████▋  | 11600/15000 [23:41<07:03,  8.03it/s]


 epoch: 11599 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

input:       broke into independent kingdoms in the th century ad the eastern roman empire governed from constantinople is referred to

target:      broke into independent kingdoms in the th century ad the eastern roman empire governed from constantinople is referred to as

prediction:  broke into independent kingdoms in the th century ad the eastern roman empire governed from constantinople is referred to the

 epoch: 11600 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 77%|███████▋  | 11603/15000 [23:41<06:13,  9.08it/s]


 epoch: 11601 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11602 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11603 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 77%|███████▋  | 11605/15000 [23:42<06:16,  9.01it/s]


 epoch: 11604 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11605 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%


 77%|███████▋  | 11606/15000 [23:42<06:11,  9.15it/s]


 epoch: 11606 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 77%|███████▋  | 11609/15000 [23:42<06:42,  8.43it/s]


 epoch: 11607 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11608 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%

 epoch: 11609 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 77%|███████▋  | 11610/15000 [23:42<07:02,  8.02it/s]


input:       treatise on painting in france and italy in and germany in with engravings based upon drawings by the classical

target:      treatise on painting in france and italy in and germany in with engravings based upon drawings by the classical painter

prediction:  treatise on painting in france and italy in and germany in with engravings based upon drawings by the classical the

 epoch: 11610 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 11611 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 77%|███████▋  | 11614/15000 [23:43<05:49,  9.68it/s]


 epoch: 11612 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11613 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11614 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 77%|███████▋  | 11617/15000 [23:43<05:39,  9.97it/s]


 epoch: 11615 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11616 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11617 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 77%|███████▋  | 11619/15000 [23:43<05:44,  9.81it/s]


 epoch: 11618 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11619 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       the unclaimed area between it and the south pole the argentine british and chilean claims overlap and have caused

target:      the unclaimed area between it and the south pole the argentine british and chilean claims overlap and have caused friction


 77%|███████▋  | 11620/15000 [23:43<06:26,  8.74it/s]


prediction:  the unclaimed area between it and the south pole the argentine british and chilean claims overlap and have caused the

 epoch: 11620 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.7%


 77%|███████▋  | 11622/15000 [23:44<11:18,  4.98it/s]


 epoch: 11621 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11622 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11623 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 78%|███████▊  | 11626/15000 [23:44<07:13,  7.79it/s]


 epoch: 11624 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11625 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11626 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 78%|███████▊  | 11628/15000 [23:44<06:32,  8.60it/s]


 epoch: 11627 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11628 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11629 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 78%|███████▊  | 11630/15000 [23:45<06:33,  8.56it/s]


input:       struggle the youth then becomes constrained by his lack of social authority and is left with little choice but

target:      struggle the youth then becomes constrained by his lack of social authority and is left with little choice but to

prediction:  struggle the youth then becomes constrained by his lack of social authority and is left with little choice but the

 epoch: 11630 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.9%

 epoch: 11631 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 78%|███████▊  | 11633/15000 [23:45<06:03,  9.27it/s]


 epoch: 11632 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11633 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 11634 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 78%|███████▊  | 11637/15000 [23:46<07:33,  7.41it/s]


 epoch: 11635 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

 epoch: 11636 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11637 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 78%|███████▊  | 11639/15000 [23:46<07:19,  7.65it/s]


 epoch: 11638 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11639 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

input:       many offer paid education by charging tuition fees more detailed classification focuses on the social institution responsible for education


 78%|███████▊  | 11640/15000 [23:46<08:03,  6.95it/s]


target:      many offer paid education by charging tuition fees more detailed classification focuses on the social institution responsible for education it

prediction:  many offer paid education by charging tuition fees more detailed classification focuses on the social institution responsible for education the

 epoch: 11640 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 78%|███████▊  | 11642/15000 [23:46<07:31,  7.44it/s]


 epoch: 11641 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11642 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 78%|███████▊  | 11644/15000 [23:47<07:29,  7.47it/s]


 epoch: 11643 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11644 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.19, test_acc: 97.1%


 78%|███████▊  | 11646/15000 [23:47<06:54,  8.10it/s]


 epoch: 11645 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11646 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%


 78%|███████▊  | 11648/15000 [23:47<06:55,  8.07it/s]


 epoch: 11647 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11648 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 78%|███████▊  | 11649/15000 [23:47<06:50,  8.17it/s]


 epoch: 11649 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

input:       mitigation and adaptation strategies in the management of ecosystem goods and services and the agriculture production systems in africa

target:      mitigation and adaptation strategies in the management of ecosystem goods and services and the agriculture production systems in africa africa

prediction:  mitigation and adaptation strategies in the management of ecosystem goods and services and the agriculture production systems in africa the


 78%|███████▊  | 11651/15000 [23:48<09:05,  6.14it/s]


 epoch: 11650 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11651 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 78%|███████▊  | 11653/15000 [23:48<07:56,  7.03it/s]


 epoch: 11652 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11653 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 78%|███████▊  | 11656/15000 [23:48<06:20,  8.79it/s]


 epoch: 11654 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11655 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11656 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 78%|███████▊  | 11658/15000 [23:48<05:51,  9.51it/s]


 epoch: 11657 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11658 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 78%|███████▊  | 11660/15000 [23:49<07:13,  7.70it/s]


 epoch: 11659 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       observations given that no monopoles have been found this problem is resolved by cosmic inflation which removes all point

target:      observations given that no monopoles have been found this problem is resolved by cosmic inflation which removes all point defects

prediction:  observations given that no monopoles have been found this problem is resolved by cosmic inflation which removes all point the

 epoch: 11660 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 78%|███████▊  | 11662/15000 [23:49<07:07,  7.81it/s]


 epoch: 11661 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11662 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 78%|███████▊  | 11663/15000 [23:49<07:14,  7.67it/s]


 epoch: 11663 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 78%|███████▊  | 11665/15000 [23:50<11:18,  4.92it/s]


 epoch: 11664 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11665 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 78%|███████▊  | 11668/15000 [23:50<07:51,  7.07it/s]


 epoch: 11666 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11667 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11668 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 78%|███████▊  | 11670/15000 [23:50<07:31,  7.37it/s]


 epoch: 11669 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%

input:       universe to reach approximate thermodynamic equilibrium others were fast enough to reach thermalization the parameter usually used to find

target:      universe to reach approximate thermodynamic equilibrium others were fast enough to reach thermalization the parameter usually used to find out

prediction:  universe to reach approximate thermodynamic equilibrium others were fast enough to reach thermalization the parameter usually used to find the

 epoch: 11670 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 78%|███████▊  | 11672/15000 [23:50<06:31,  8.49it/s]


 epoch: 11671 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11672 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11673 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 78%|███████▊  | 11675/15000 [23:51<06:00,  9.22it/s]


 epoch: 11674 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

 epoch: 11675 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 78%|███████▊  | 11677/15000 [23:51<05:56,  9.31it/s]


 epoch: 11676 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 11677 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 78%|███████▊  | 11678/15000 [23:51<05:55,  9.35it/s]


 epoch: 11678 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 78%|███████▊  | 11680/15000 [23:52<11:04,  5.00it/s]


 epoch: 11679 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       hudson river school was mid th century movement in the tradition of european naturalism the armory show in new

target:      hudson river school was mid th century movement in the tradition of european naturalism the armory show in new york

prediction:  hudson river school was mid th century movement in the tradition of european naturalism the armory show in new the

 epoch: 11680 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.4%


 78%|███████▊  | 11682/15000 [23:52<08:26,  6.55it/s]


 epoch: 11681 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.4%

 epoch: 11682 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%


 78%|███████▊  | 11686/15000 [23:52<06:09,  8.98it/s]


 epoch: 11683 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11684 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11685 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11686 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 78%|███████▊  | 11689/15000 [23:53<05:37,  9.81it/s]


 epoch: 11687 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11688 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11689 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 78%|███████▊  | 11691/15000 [23:53<06:06,  9.03it/s]


input:       the equator where seafloor invertebrates and trilobites flourished in the tropical seas by the start of the devonian period

target:      the equator where seafloor invertebrates and trilobites flourished in the tropical seas by the start of the devonian period ma

prediction:  the equator where seafloor invertebrates and trilobites flourished in the tropical seas by the start of the devonian period the

 epoch: 11690 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11691 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 78%|███████▊  | 11692/15000 [23:53<06:17,  8.77it/s]


 epoch: 11692 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 78%|███████▊  | 11695/15000 [23:54<09:00,  6.12it/s]


 epoch: 11693 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11694 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11695 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 78%|███████▊  | 11698/15000 [23:54<07:03,  7.80it/s]


 epoch: 11696 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11697 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11698 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 78%|███████▊  | 11700/15000 [23:54<06:57,  7.90it/s]


 epoch: 11699 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       who argued for stronger control of the markets by the state the theory that the state can alleviate economic

target:      who argued for stronger control of the markets by the state the theory that the state can alleviate economic problems

prediction:  who argued for stronger control of the markets by the state the theory that the state can alleviate economic the

 epoch: 11700 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 78%|███████▊  | 11702/15000 [23:54<06:11,  8.89it/s]


 epoch: 11701 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11702 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11703 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 78%|███████▊  | 11706/15000 [23:55<05:27, 10.05it/s]


 epoch: 11704 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11705 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11706 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 78%|███████▊  | 11708/15000 [23:55<07:50,  6.99it/s]


 epoch: 11707 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 11708 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11709 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 78%|███████▊  | 11710/15000 [23:55<07:17,  7.52it/s]


input:       opus is often regarded as the most famous portrait ever made the last supper is the most reproduced religious

target:      opus is often regarded as the most famous portrait ever made the last supper is the most reproduced religious painting

prediction:  opus is often regarded as the most famous portrait ever made the last supper is the most reproduced religious the

 epoch: 11710 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11711 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%


 78%|███████▊  | 11714/15000 [23:56<05:59,  9.13it/s]


 epoch: 11712 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11713 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11714 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%


 78%|███████▊  | 11716/15000 [23:56<05:54,  9.25it/s]


 epoch: 11715 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11716 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 78%|███████▊  | 11719/15000 [23:56<05:37,  9.71it/s]


 epoch: 11717 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11718 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%

 epoch: 11719 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 78%|███████▊  | 11720/15000 [23:56<06:18,  8.67it/s]


input:       pool of highly skilled english speaking workers the increased use of outsourcing has assisted the rise of india and

target:      pool of highly skilled english speaking workers the increased use of outsourcing has assisted the rise of india and the

prediction:  pool of highly skilled english speaking workers the increased use of outsourcing has assisted the rise of india and the

 epoch: 11720 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 78%|███████▊  | 11723/15000 [23:57<08:43,  6.26it/s]


 epoch: 11721 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11722 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11723 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 78%|███████▊  | 11725/15000 [23:57<07:26,  7.33it/s]


 epoch: 11724 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11725 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11726 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 78%|███████▊  | 11729/15000 [23:58<05:43,  9.52it/s]


 epoch: 11727 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11728 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11729 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 78%|███████▊  | 11731/15000 [23:58<06:03,  9.00it/s]


input:       live streaming service run by reddit viewers interacted with streams by upvoting or downvoting chatting and by giving paid

target:      live streaming service run by reddit viewers interacted with streams by upvoting or downvoting chatting and by giving paid awards

prediction:  live streaming service run by reddit viewers interacted with streams by upvoting or downvoting chatting and by giving paid the

 epoch: 11730 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

 epoch: 11731 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 78%|███████▊  | 11733/15000 [23:58<05:58,  9.12it/s]


 epoch: 11732 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11733 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 78%|███████▊  | 11737/15000 [23:59<08:53,  6.11it/s]


 epoch: 11734 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11735 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11736 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11737 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.1%


 78%|███████▊  | 11738/15000 [23:59<08:10,  6.65it/s]


 epoch: 11738 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11739 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

input:       the two sentences the slaves were cursing the master and the master was cursing the slaves mean different things

target:      the two sentences the slaves were cursing the master and the master was cursing the slaves mean different things because

prediction:  the two sentences the slaves were cursing the master and the master was cursing the slaves mean different things the


 78%|███████▊  | 11741/15000 [23:59<06:54,  7.87it/s]


 epoch: 11740 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11741 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%


 78%|███████▊  | 11745/15000 [24:00<05:51,  9.26it/s]


 epoch: 11742 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 11743 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11744 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

 epoch: 11745 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%


 78%|███████▊  | 11747/15000 [24:00<05:55,  9.16it/s]


 epoch: 11746 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11747 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 78%|███████▊  | 11748/15000 [24:00<05:53,  9.20it/s]


 epoch: 11748 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 78%|███████▊  | 11749/15000 [24:01<12:35,  4.31it/s]


 epoch: 11749 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

input:       humanistic psychology also focuses on personal growth self identity death aloneness and freedom it emphasizes subjective meaning the rejection

target:      humanistic psychology also focuses on personal growth self identity death aloneness and freedom it emphasizes subjective meaning the rejection of

prediction:  humanistic psychology also focuses on personal growth self identity death aloneness and freedom it emphasizes subjective meaning the rejection the


 78%|███████▊  | 11751/15000 [24:01<10:35,  5.11it/s]


 epoch: 11750 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11751 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%


 78%|███████▊  | 11753/15000 [24:01<08:25,  6.42it/s]


 epoch: 11752 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11753 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 78%|███████▊  | 11755/15000 [24:02<07:34,  7.14it/s]


 epoch: 11754 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.4%

 epoch: 11755 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 78%|███████▊  | 11757/15000 [24:02<06:53,  7.84it/s]


 epoch: 11756 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.7%

 epoch: 11757 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 78%|███████▊  | 11759/15000 [24:02<06:55,  7.80it/s]


 epoch: 11758 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11759 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 78%|███████▊  | 11760/15000 [24:02<07:57,  6.79it/s]


input:       the combined islands of melanesia micronesia and polynesia as well as the ryukyu islands he added that besides the

target:      the combined islands of melanesia micronesia and polynesia as well as the ryukyu islands he added that besides the proceeding

prediction:  the combined islands of melanesia micronesia and polynesia as well as the ryukyu islands he added that besides the the

 epoch: 11760 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 78%|███████▊  | 11762/15000 [24:02<07:31,  7.17it/s]


 epoch: 11761 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11762 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 78%|███████▊  | 11763/15000 [24:03<07:12,  7.49it/s]


 epoch: 11763 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 78%|███████▊  | 11765/15000 [24:03<07:51,  6.86it/s]


 epoch: 11764 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11765 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.4%


 78%|███████▊  | 11767/15000 [24:03<07:26,  7.24it/s]


 epoch: 11766 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11767 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 78%|███████▊  | 11769/15000 [24:03<06:53,  7.81it/s]


 epoch: 11768 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11769 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       ruled china for over four centuries with brief interruption from ad to the han dynasty promoted the spread of


 78%|███████▊  | 11770/15000 [24:04<07:22,  7.30it/s]


target:      ruled china for over four centuries with brief interruption from ad to the han dynasty promoted the spread of iron

prediction:  ruled china for over four centuries with brief interruption from ad to the han dynasty promoted the spread of the

 epoch: 11770 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11771 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 78%|███████▊  | 11774/15000 [24:04<05:55,  9.08it/s]


 epoch: 11772 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11773 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11774 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 79%|███████▊  | 11776/15000 [24:04<05:50,  9.19it/s]


 epoch: 11775 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 11776 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%


 79%|███████▊  | 11777/15000 [24:04<05:45,  9.33it/s]


 epoch: 11777 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 79%|███████▊  | 11779/15000 [24:05<10:18,  5.21it/s]


 epoch: 11778 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11779 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

input:       bc he led expeditions to bactria and then india further plans to invade arabia and north africa were halted

target:      bc he led expeditions to bactria and then india further plans to invade arabia and north africa were halted by


 79%|███████▊  | 11780/15000 [24:05<09:41,  5.54it/s]


prediction:  bc he led expeditions to bactria and then india further plans to invade arabia and north africa were halted the

 epoch: 11780 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11781 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.1%


 79%|███████▊  | 11783/15000 [24:05<07:20,  7.30it/s]


 epoch: 11782 | train_loss: 0.20, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11783 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 79%|███████▊  | 11785/15000 [24:06<06:28,  8.27it/s]


 epoch: 11784 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11785 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.7%

 epoch: 11786 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 79%|███████▊  | 11789/15000 [24:06<05:34,  9.59it/s]


 epoch: 11787 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11788 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11789 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 79%|███████▊  | 11790/15000 [24:06<06:16,  8.52it/s]


input:       philosophical implications of their work for instance laplace who championed causal determinism and erwin schr dinger who wrote on

target:      philosophical implications of their work for instance laplace who championed causal determinism and erwin schr dinger who wrote on quantum

prediction:  philosophical implications of their work for instance laplace who championed causal determinism and erwin schr dinger who wrote on the

 epoch: 11790 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11791 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%


 79%|███████▊  | 11793/15000 [24:07<06:47,  7.88it/s]


 epoch: 11792 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11793 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11794 | train_loss: 0.20, train_acc: 97.5% | test_loss: 0.22, test_acc: 97.1%


 79%|███████▊  | 11797/15000 [24:07<05:31,  9.68it/s]


 epoch: 11795 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11796 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11797 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 79%|███████▊  | 11799/15000 [24:07<05:37,  9.49it/s]


 epoch: 11798 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 11799 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       an attempt to find more suitable alternative was not successful the big bang models developed from observations of the

target:      an attempt to find more suitable alternative was not successful the big bang models developed from observations of the structure

prediction:  an attempt to find more suitable alternative was not successful the big bang models developed from observations of the the


 79%|███████▊  | 11802/15000 [24:07<05:41,  9.37it/s]


 epoch: 11800 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11801 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11802 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 79%|███████▊  | 11805/15000 [24:08<05:23,  9.87it/s]


 epoch: 11803 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11804 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11805 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 79%|███████▊  | 11808/15000 [24:08<06:24,  8.30it/s]


 epoch: 11806 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11807 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11808 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


 79%|███████▊  | 11810/15000 [24:08<06:38,  8.00it/s]


 epoch: 11809 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

input:       your mood the source code for git refers to the program as the information manager from hell list of

target:      your mood the source code for git refers to the program as the information manager from hell list of git

prediction:  your mood the source code for git refers to the program as the information manager from hell list of the

 epoch: 11810 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 79%|███████▊  | 11812/15000 [24:09<05:56,  8.95it/s]


 epoch: 11811 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 11812 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11813 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 79%|███████▉  | 11816/15000 [24:09<05:12, 10.19it/s]


 epoch: 11814 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11815 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11816 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 79%|███████▉  | 11818/15000 [24:09<05:10, 10.24it/s]


 epoch: 11817 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11818 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11819 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       why some interventions fail and others succeed true experiment with random assignment of research participants sometimes called subjects to

target:      why some interventions fail and others succeed true experiment with random assignment of research participants sometimes called subjects to rival

prediction:  why some interventions fail and others succeed true experiment with random assignment of research participants sometimes called subjects to the


 79%|███████▉  | 11822/15000 [24:10<07:56,  6.67it/s]


 epoch: 11820 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11821 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11822 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 79%|███████▉  | 11825/15000 [24:10<06:36,  8.00it/s]


 epoch: 11823 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11824 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11825 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 79%|███████▉  | 11827/15000 [24:11<06:15,  8.45it/s]


 epoch: 11826 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11827 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11828 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 79%|███████▉  | 11828/15000 [24:11<06:02,  8.76it/s]



 epoch: 11829 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       separating fiscal responsibility from justice and administration was reform of the imperial era to avoid provincial governors and tax

target:      separating fiscal responsibility from justice and administration was reform of the imperial era to avoid provincial governors and tax farmers

prediction:  separating fiscal responsibility from justice and administration was reform of the imperial era to avoid provincial governors and tax the


 79%|███████▉  | 11831/15000 [24:11<05:56,  8.88it/s]


 epoch: 11830 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11831 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 79%|███████▉  | 11833/15000 [24:11<05:41,  9.28it/s]


 epoch: 11832 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11833 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 79%|███████▉  | 11834/15000 [24:11<05:38,  9.35it/s]


 epoch: 11834 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 79%|███████▉  | 11836/15000 [24:12<09:51,  5.35it/s]


 epoch: 11835 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11836 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11837 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 79%|███████▉  | 11838/15000 [24:12<07:47,  6.76it/s]


 epoch: 11838 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 11839 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

input:       the unemployment rate was well below the historical average the united states has continued confront sociopolitical debates on various

target:      the unemployment rate was well below the historical average the united states has continued confront sociopolitical debates on various issues

prediction:  the unemployment rate was well below the historical average the united states has continued confront sociopolitical debates on various the


 79%|███████▉  | 11842/15000 [24:13<06:17,  8.37it/s]


 epoch: 11840 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11841 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11842 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 79%|███████▉  | 11845/15000 [24:13<05:31,  9.52it/s]


 epoch: 11843 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.19, test_acc: 97.2%

 epoch: 11844 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 11845 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%


 79%|███████▉  | 11847/15000 [24:13<05:18,  9.90it/s]


 epoch: 11846 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11847 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11848 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 79%|███████▉  | 11850/15000 [24:14<08:59,  5.84it/s]


 epoch: 11849 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       education year organized institutions play key role for various aspects of education institutions like schools universities teacher training institutions

target:      education year organized institutions play key role for various aspects of education institutions like schools universities teacher training institutions and

prediction:  education year organized institutions play key role for various aspects of education institutions like schools universities teacher training institutions the

 epoch: 11850 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%


 79%|███████▉  | 11852/15000 [24:14<07:52,  6.66it/s]


 epoch: 11851 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11852 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 79%|███████▉  | 11854/15000 [24:14<06:59,  7.49it/s]


 epoch: 11853 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11854 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 79%|███████▉  | 11856/15000 [24:15<06:38,  7.90it/s]


 epoch: 11855 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11856 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.19, test_acc: 97.3%


 79%|███████▉  | 11858/15000 [24:15<06:23,  8.18it/s]


 epoch: 11857 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 11858 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 79%|███████▉  | 11860/15000 [24:15<07:25,  7.05it/s]


 epoch: 11859 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

input:       aimed at curbing the presence of communities they believed to be promoting hate and banned approximately subreddits that were

target:      aimed at curbing the presence of communities they believed to be promoting hate and banned approximately subreddits that were found

prediction:  aimed at curbing the presence of communities they believed to be promoting hate and banned approximately subreddits that were the

 epoch: 11860 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 79%|███████▉  | 11862/15000 [24:15<06:53,  7.59it/s]


 epoch: 11861 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11862 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 79%|███████▉  | 11864/15000 [24:16<07:24,  7.05it/s]


 epoch: 11863 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11864 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 79%|███████▉  | 11866/15000 [24:16<06:50,  7.64it/s]


 epoch: 11865 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 11866 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 79%|███████▉  | 11868/15000 [24:16<06:41,  7.81it/s]


 epoch: 11867 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11868 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 79%|███████▉  | 11870/15000 [24:16<07:46,  6.71it/s]


 epoch: 11869 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

input:       of high compensation paid by brazil mediatized by the united kingdom the newly independent nations began process of fragmentation

target:      of high compensation paid by brazil mediatized by the united kingdom the newly independent nations began process of fragmentation with

prediction:  of high compensation paid by brazil mediatized by the united kingdom the newly independent nations began process of fragmentation the

 epoch: 11870 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 79%|███████▉  | 11872/15000 [24:17<06:59,  7.46it/s]


 epoch: 11871 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11872 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 79%|███████▉  | 11874/15000 [24:17<07:09,  7.28it/s]


 epoch: 11873 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11874 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 79%|███████▉  | 11876/15000 [24:17<06:48,  7.64it/s]


 epoch: 11875 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11876 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 79%|███████▉  | 11878/15000 [24:18<07:34,  6.87it/s]


 epoch: 11877 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11878 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 79%|███████▉  | 11880/15000 [24:18<06:57,  7.48it/s]


 epoch: 11879 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       part of ancient greek life though the greeks of different cities and tribes worshipped similar gods religious practices were

target:      part of ancient greek life though the greeks of different cities and tribes worshipped similar gods religious practices were not

prediction:  part of ancient greek life though the greeks of different cities and tribes worshipped similar gods religious practices were the

 epoch: 11880 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 79%|███████▉  | 11883/15000 [24:18<05:48,  8.95it/s]


 epoch: 11881 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11882 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11883 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 79%|███████▉  | 11885/15000 [24:18<05:30,  9.43it/s]


 epoch: 11884 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11885 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11886 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 79%|███████▉  | 11888/15000 [24:19<05:20,  9.71it/s]


 epoch: 11887 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11888 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.25, test_acc: 96.8%


 79%|███████▉  | 11890/15000 [24:19<06:00,  8.63it/s]


 epoch: 11889 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

input:       and normalized behaviour such as raising hand to speak are imposed in the name of greater efficiency practitioners of

target:      and normalized behaviour such as raising hand to speak are imposed in the name of greater efficiency practitioners of critical

prediction:  and normalized behaviour such as raising hand to speak are imposed in the name of greater efficiency practitioners of the

 epoch: 11890 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 79%|███████▉  | 11891/15000 [24:19<05:52,  8.82it/s]


 epoch: 11891 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 79%|███████▉  | 11893/15000 [24:20<10:12,  5.08it/s]


 epoch: 11892 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11893 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 79%|███████▉  | 11894/15000 [24:20<08:56,  5.79it/s]


 epoch: 11894 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11895 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 79%|███████▉  | 11898/15000 [24:20<06:23,  8.08it/s]


 epoch: 11896 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11897 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11898 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%


 79%|███████▉  | 11900/15000 [24:20<06:16,  8.24it/s]


 epoch: 11899 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       history the world population was already exponentially increasing due to the neolithic revolution which was in full progress while

target:      history the world population was already exponentially increasing due to the neolithic revolution which was in full progress while in

prediction:  history the world population was already exponentially increasing due to the neolithic revolution which was in full progress while the

 epoch: 11900 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 79%|███████▉  | 11902/15000 [24:21<05:48,  8.90it/s]


 epoch: 11901 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11902 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11903 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.5%


 79%|███████▉  | 11905/15000 [24:21<05:27,  9.44it/s]


 epoch: 11904 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11905 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 79%|███████▉  | 11907/15000 [24:21<09:17,  5.55it/s]


 epoch: 11906 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11907 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 79%|███████▉  | 11909/15000 [24:22<07:29,  6.87it/s]


 epoch: 11908 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11909 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

input:       whole the field generally includes two types of physicists experimental physicists who specialize in the observation of natural phenomena

target:      whole the field generally includes two types of physicists experimental physicists who specialize in the observation of natural phenomena and

prediction:  whole the field generally includes two types of physicists experimental physicists who specialize in the observation of natural phenomena the


 79%|███████▉  | 11912/15000 [24:22<06:24,  8.03it/s]


 epoch: 11910 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11911 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11912 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 79%|███████▉  | 11914/15000 [24:22<06:03,  8.49it/s]


 epoch: 11913 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 11914 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 79%|███████▉  | 11916/15000 [24:22<05:51,  8.77it/s]


 epoch: 11915 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11916 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 79%|███████▉  | 11919/15000 [24:23<05:16,  9.74it/s]


 epoch: 11917 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11918 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11919 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 79%|███████▉  | 11920/15000 [24:23<05:51,  8.76it/s]


input:       the year two world wars and an economic depression dominated the first half of the th century the first

target:      the year two world wars and an economic depression dominated the first half of the th century the first world

prediction:  the year two world wars and an economic depression dominated the first half of the th century the first the

 epoch: 11920 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 79%|███████▉  | 11923/15000 [24:23<05:53,  8.71it/s]


 epoch: 11921 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11922 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11923 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 80%|███████▉  | 11925/15000 [24:23<05:34,  9.18it/s]


 epoch: 11924 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11925 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11926 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 80%|███████▉  | 11928/15000 [24:24<05:06, 10.01it/s]


 epoch: 11927 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 11928 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 80%|███████▉  | 11930/15000 [24:24<05:47,  8.84it/s]


 epoch: 11929 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.9%

input:       large margin part of both the anglosphere and western world the united states is also home to wide variety

target:      large margin part of both the anglosphere and western world the united states is also home to wide variety of

prediction:  large margin part of both the anglosphere and western world the united states is also home to wide variety the

 epoch: 11930 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 80%|███████▉  | 11932/15000 [24:24<05:42,  8.96it/s]


 epoch: 11931 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 11932 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 80%|███████▉  | 11934/15000 [24:24<05:27,  9.36it/s]


 epoch: 11933 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11934 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 80%|███████▉  | 11936/15000 [24:25<10:06,  5.06it/s]


 epoch: 11935 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%

 epoch: 11936 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 11937 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 80%|███████▉  | 11938/15000 [24:25<07:39,  6.67it/s]


 epoch: 11938 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11939 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       the developer understand the file system operations occurring behind the scenes the term api initially described an interface only

target:      the developer understand the file system operations occurring behind the scenes the term api initially described an interface only for

prediction:  the developer understand the file system operations occurring behind the scenes the term api initially described an interface only the


 80%|███████▉  | 11941/15000 [24:26<06:29,  7.85it/s]


 epoch: 11940 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 11941 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11942 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 80%|███████▉  | 11945/15000 [24:26<05:25,  9.37it/s]


 epoch: 11943 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 11944 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11945 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 80%|███████▉  | 11947/15000 [24:26<05:24,  9.41it/s]


 epoch: 11946 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11947 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 80%|███████▉  | 11948/15000 [24:26<05:24,  9.40it/s]


 epoch: 11948 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 80%|███████▉  | 11950/15000 [24:27<10:40,  4.77it/s]


 epoch: 11949 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       world lake titicaca and excluding research stations in antarctica the world southernmost permanently inhabited community puerto toro chile south

target:      world lake titicaca and excluding research stations in antarctica the world southernmost permanently inhabited community puerto toro chile south america

prediction:  world lake titicaca and excluding research stations in antarctica the world southernmost permanently inhabited community puerto toro chile south the

 epoch: 11950 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%


 80%|███████▉  | 11953/15000 [24:27<07:07,  7.13it/s]


 epoch: 11951 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.1%

 epoch: 11952 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 96.9%

 epoch: 11953 | train_loss: 0.18, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 80%|███████▉  | 11956/15000 [24:28<06:03,  8.37it/s]


 epoch: 11954 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11955 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 11956 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 80%|███████▉  | 11958/15000 [24:28<05:54,  8.58it/s]


 epoch: 11957 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

 epoch: 11958 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 80%|███████▉  | 11960/15000 [24:28<06:46,  7.49it/s]


 epoch: 11959 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       of sign and meaning goes back to the first linguistic studies of de saussure and is now used in

target:      of sign and meaning goes back to the first linguistic studies of de saussure and is now used in almost

prediction:  of sign and meaning goes back to the first linguistic studies of de saussure and is now used in the

 epoch: 11960 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 80%|███████▉  | 11962/15000 [24:28<06:17,  8.06it/s]


 epoch: 11961 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 11962 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 80%|███████▉  | 11963/15000 [24:28<06:22,  7.93it/s]


 epoch: 11963 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 80%|███████▉  | 11965/15000 [24:29<10:47,  4.69it/s]


 epoch: 11964 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11965 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 80%|███████▉  | 11967/15000 [24:29<08:42,  5.81it/s]


 epoch: 11966 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11967 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 80%|███████▉  | 11969/15000 [24:30<07:18,  6.91it/s]


 epoch: 11968 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11969 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 80%|███████▉  | 11970/15000 [24:30<08:10,  6.18it/s]


input:       pain and anxiety caused the most years lost to disability the most harmful risk factors were poor diet tobacco

target:      pain and anxiety caused the most years lost to disability the most harmful risk factors were poor diet tobacco smoking

prediction:  pain and anxiety caused the most years lost to disability the most harmful risk factors were poor diet tobacco the

 epoch: 11970 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%


 80%|███████▉  | 11972/15000 [24:30<07:26,  6.79it/s]


 epoch: 11971 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11972 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.1%


 80%|███████▉  | 11974/15000 [24:30<06:53,  7.31it/s]


 epoch: 11973 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 11974 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 80%|███████▉  | 11976/15000 [24:31<06:30,  7.74it/s]


 epoch: 11975 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 11976 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 80%|███████▉  | 11977/15000 [24:31<06:17,  8.01it/s]


 epoch: 11977 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%


 80%|███████▉  | 11979/15000 [24:31<11:05,  4.54it/s]


 epoch: 11978 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 11979 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 80%|███████▉  | 11980/15000 [24:32<10:33,  4.77it/s]


input:       harsh towards germany upon whom it placed full responsibility for the war and imposed heavy sanctions excess deaths in

target:      harsh towards germany upon whom it placed full responsibility for the war and imposed heavy sanctions excess deaths in russia

prediction:  harsh towards germany upon whom it placed full responsibility for the war and imposed heavy sanctions excess deaths in the

 epoch: 11980 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 80%|███████▉  | 11983/15000 [24:32<07:02,  7.14it/s]


 epoch: 11981 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11982 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 11983 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.0%


 80%|███████▉  | 11985/15000 [24:32<06:17,  7.99it/s]


 epoch: 11984 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 11985 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 80%|███████▉  | 11987/15000 [24:32<05:45,  8.71it/s]


 epoch: 11986 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 11987 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 80%|███████▉  | 11988/15000 [24:32<05:33,  9.03it/s]


 epoch: 11988 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 11989 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       the ancient civilizations of egypt greece china india and persia all engaged in the philosophical study of psychology in

target:      the ancient civilizations of egypt greece china india and persia all engaged in the philosophical study of psychology in ancient

prediction:  the ancient civilizations of egypt greece china india and persia all engaged in the philosophical study of psychology in the


 80%|███████▉  | 11991/15000 [24:33<05:39,  8.85it/s]


 epoch: 11990 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 11991 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 80%|███████▉  | 11993/15000 [24:33<06:28,  7.74it/s]


 epoch: 11992 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.2%

 epoch: 11993 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 80%|███████▉  | 11995/15000 [24:33<05:50,  8.57it/s]


 epoch: 11994 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 11995 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 80%|███████▉  | 11997/15000 [24:33<05:36,  8.93it/s]


 epoch: 11996 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11997 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 80%|███████▉  | 11999/15000 [24:34<05:29,  9.12it/s]


 epoch: 11998 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 11999 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

input:       generally considered to begin with the bronze age the start and end of the three ages vary between world

target:      generally considered to begin with the bronze age the start and end of the three ages vary between world regions


 80%|████████  | 12001/15000 [24:34<05:50,  8.56it/s]


prediction:  generally considered to begin with the bronze age the start and end of the three ages vary between world the

 epoch: 12000 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12001 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 80%|████████  | 12004/15000 [24:34<05:18,  9.40it/s]


 epoch: 12002 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12003 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12004 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12005 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 80%|████████  | 12007/15000 [24:35<08:57,  5.57it/s]


 epoch: 12006 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.4%

 epoch: 12007 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 80%|████████  | 12008/15000 [24:35<08:03,  6.19it/s]


 epoch: 12008 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12009 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       of thought such as the approach to problem solving developed in your education or experience as physicist in all

target:      of thought such as the approach to problem solving developed in your education or experience as physicist in all cases

prediction:  of thought such as the approach to problem solving developed in your education or experience as physicist in all the


 80%|████████  | 12011/15000 [24:35<06:38,  7.50it/s]


 epoch: 12010 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12011 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 80%|████████  | 12013/15000 [24:36<06:00,  8.29it/s]


 epoch: 12012 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12013 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 80%|████████  | 12015/15000 [24:36<05:35,  8.88it/s]


 epoch: 12014 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12015 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 80%|████████  | 12017/15000 [24:36<05:20,  9.32it/s]


 epoch: 12016 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.7%

 epoch: 12017 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%


 80%|████████  | 12019/15000 [24:36<05:13,  9.50it/s]


 epoch: 12018 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12019 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       either case the big bang as an event is also colloquially referred to as the birth of our universe

target:      either case the big bang as an event is also colloquially referred to as the birth of our universe since

prediction:  either case the big bang as an event is also colloquially referred to as the birth of our universe the


 80%|████████  | 12021/15000 [24:37<10:02,  4.94it/s]


 epoch: 12020 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12021 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 80%|████████  | 12023/15000 [24:37<07:36,  6.52it/s]


 epoch: 12022 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12023 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12024 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 80%|████████  | 12026/15000 [24:37<06:16,  7.90it/s]


 epoch: 12025 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12026 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.19, test_acc: 97.1%


 80%|████████  | 12029/15000 [24:38<05:29,  9.00it/s]


 epoch: 12027 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 12028 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12029 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%


 80%|████████  | 12030/15000 [24:38<05:55,  8.36it/s]


input:       that drops to around in the winter despite its remoteness human activity has significant impact on the continent via

target:      that drops to around in the winter despite its remoteness human activity has significant impact on the continent via pollution

prediction:  that drops to around in the winter despite its remoteness human activity has significant impact on the continent via the

 epoch: 12030 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12031 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 80%|████████  | 12032/15000 [24:38<05:26,  9.09it/s]


 epoch: 12032 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12033 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 80%|████████  | 12035/15000 [24:39<07:04,  6.99it/s]


 epoch: 12034 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12035 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12036 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 80%|████████  | 12039/15000 [24:39<05:35,  8.82it/s]


 epoch: 12037 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12038 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12039 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 80%|████████  | 12040/15000 [24:39<06:02,  8.16it/s]


input:       see language as being mostly innate such as psychologist steven pinker hold the precedents to be animal cognition whereas

target:      see language as being mostly innate such as psychologist steven pinker hold the precedents to be animal cognition whereas those

prediction:  see language as being mostly innate such as psychologist steven pinker hold the precedents to be animal cognition whereas the

 epoch: 12040 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12041 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 80%|████████  | 12044/15000 [24:40<05:12,  9.46it/s]


 epoch: 12042 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 12043 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12044 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 80%|████████  | 12047/15000 [24:40<04:59,  9.86it/s]


 epoch: 12045 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12046 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12047 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 80%|████████  | 12048/15000 [24:40<05:02,  9.77it/s]


 epoch: 12048 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 80%|████████  | 12050/15000 [24:41<09:09,  5.36it/s]


 epoch: 12049 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%

input:       one desirability for specific relationship partner but this effect is only most likely to occur when men use humour

target:      one desirability for specific relationship partner but this effect is only most likely to occur when men use humour and

prediction:  one desirability for specific relationship partner but this effect is only most likely to occur when men use humour the

 epoch: 12050 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 80%|████████  | 12052/15000 [24:41<07:16,  6.75it/s]


 epoch: 12051 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12052 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12053 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 80%|████████  | 12056/15000 [24:41<05:30,  8.91it/s]


 epoch: 12054 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.4%

 epoch: 12055 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12056 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 80%|████████  | 12058/15000 [24:41<05:10,  9.49it/s]


 epoch: 12057 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12058 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12059 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 80%|████████  | 12060/15000 [24:42<05:34,  8.79it/s]


input:       often on or near the sites of preexisting walled settlements known as oppida urbanization in roman africa expanded on

target:      often on or near the sites of preexisting walled settlements known as oppida urbanization in roman africa expanded on greek

prediction:  often on or near the sites of preexisting walled settlements known as oppida urbanization in roman africa expanded on the

 epoch: 12060 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 80%|████████  | 12062/15000 [24:42<05:48,  8.44it/s]


 epoch: 12061 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 12062 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 80%|████████  | 12064/15000 [24:43<09:40,  5.06it/s]


 epoch: 12063 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12064 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 80%|████████  | 12066/15000 [24:43<07:38,  6.41it/s]


 epoch: 12065 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 96.8%

 epoch: 12066 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 80%|████████  | 12068/15000 [24:43<06:28,  7.55it/s]


 epoch: 12067 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12068 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 80%|████████  | 12070/15000 [24:43<06:50,  7.14it/s]


 epoch: 12069 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       of globally recognized newspapers in the united states include the wall street journal the new york times the washington

target:      of globally recognized newspapers in the united states include the wall street journal the new york times the washington post

prediction:  of globally recognized newspapers in the united states include the wall street journal the new york times the washington the

 epoch: 12070 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 80%|████████  | 12072/15000 [24:44<06:25,  7.60it/s]


 epoch: 12071 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12072 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 80%|████████  | 12074/15000 [24:44<06:07,  7.97it/s]


 epoch: 12073 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12074 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 81%|████████  | 12076/15000 [24:44<06:14,  7.81it/s]


 epoch: 12075 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12076 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 81%|████████  | 12078/15000 [24:45<09:19,  5.23it/s]


 epoch: 12077 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12078 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 81%|████████  | 12080/15000 [24:45<08:34,  5.67it/s]


 epoch: 12079 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

input:       among others number of insects such as the small tortoiseshell butterfly add to the biodiversity sea creatures are also

target:      among others number of insects such as the small tortoiseshell butterfly add to the biodiversity sea creatures are also an

prediction:  among others number of insects such as the small tortoiseshell butterfly add to the biodiversity sea creatures are also the

 epoch: 12080 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 81%|████████  | 12082/15000 [24:45<07:21,  6.61it/s]


 epoch: 12081 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12082 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 81%|████████  | 12084/15000 [24:45<06:20,  7.67it/s]


 epoch: 12083 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 12084 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 81%|████████  | 12087/15000 [24:46<05:20,  9.10it/s]


 epoch: 12085 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12086 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12087 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 81%|████████  | 12088/15000 [24:46<05:18,  9.13it/s]


 epoch: 12088 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 12089 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       or physically disabled and hence less mobile persons original research cyberculture is wide social and cultural movement closely linked

target:      or physically disabled and hence less mobile persons original research cyberculture is wide social and cultural movement closely linked to

prediction:  or physically disabled and hence less mobile persons original research cyberculture is wide social and cultural movement closely linked the


 81%|████████  | 12090/15000 [24:46<05:32,  8.77it/s]


 epoch: 12090 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12091 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 81%|████████  | 12093/15000 [24:47<06:09,  7.86it/s]


 epoch: 12092 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%

 epoch: 12093 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 81%|████████  | 12096/15000 [24:47<05:24,  8.95it/s]


 epoch: 12094 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12095 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.4%

 epoch: 12096 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%


 81%|████████  | 12099/15000 [24:47<05:06,  9.48it/s]


 epoch: 12097 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12098 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12099 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 81%|████████  | 12100/15000 [24:47<05:30,  8.78it/s]


input:       is to learn how to live in social groups and interact with others by coming to understand social and

target:      is to learn how to live in social groups and interact with others by coming to understand social and cultural

prediction:  is to learn how to live in social groups and interact with others by coming to understand social and the

 epoch: 12100 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12101 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 81%|████████  | 12103/15000 [24:48<05:17,  9.13it/s]


 epoch: 12102 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12103 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 81%|████████  | 12104/15000 [24:48<05:13,  9.23it/s]


 epoch: 12104 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12105 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 81%|████████  | 12107/15000 [24:48<05:30,  8.76it/s]


 epoch: 12106 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12107 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 81%|████████  | 12109/15000 [24:48<05:14,  9.20it/s]


 epoch: 12108 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12109 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

input:       stated that phonologically the match between europa name and any form of the semitic word is very poor while

target:      stated that phonologically the match between europa name and any form of the semitic word is very poor while beekes


 81%|████████  | 12110/15000 [24:48<05:57,  8.09it/s]


prediction:  stated that phonologically the match between europa name and any form of the semitic word is very poor while the

 epoch: 12110 | train_loss: 0.23, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12111 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 81%|████████  | 12114/15000 [24:49<05:05,  9.45it/s]


 epoch: 12112 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12113 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12114 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 81%|████████  | 12116/15000 [24:49<05:03,  9.50it/s]


 epoch: 12115 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12116 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 81%|████████  | 12118/15000 [24:49<05:03,  9.51it/s]


 epoch: 12117 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12118 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 81%|████████  | 12119/15000 [24:49<05:01,  9.55it/s]


 epoch: 12119 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

input:       suppression of an emergent sovereign africa and modernist art editors abiola irele and simon gikandi comment that the current

target:      suppression of an emergent sovereign africa and modernist art editors abiola irele and simon gikandi comment that the current identity

prediction:  suppression of an emergent sovereign africa and modernist art editors abiola irele and simon gikandi comment that the current the


 81%|████████  | 12122/15000 [24:50<08:41,  5.52it/s]


 epoch: 12120 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12121 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12122 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 81%|████████  | 12125/15000 [24:50<06:25,  7.45it/s]


 epoch: 12123 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12124 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12125 | train_loss: 0.20, train_acc: 97.5% | test_loss: 0.24, test_acc: 97.2%


 81%|████████  | 12127/15000 [24:51<05:44,  8.35it/s]


 epoch: 12126 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12127 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12128 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 81%|████████  | 12129/15000 [24:51<05:13,  9.14it/s]


 epoch: 12129 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       to help strengthen the frontier defences one of vespasian main goals the crisis of had wrought havoc on the

target:      to help strengthen the frontier defences one of vespasian main goals the crisis of had wrought havoc on the army

prediction:  to help strengthen the frontier defences one of vespasian main goals the crisis of had wrought havoc on the the

 epoch: 12130 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 81%|████████  | 12132/15000 [24:51<05:15,  9.10it/s]


 epoch: 12131 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12132 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 81%|████████  | 12133/15000 [24:51<05:13,  9.13it/s]


 epoch: 12133 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 81%|████████  | 12136/15000 [24:52<07:51,  6.07it/s]


 epoch: 12134 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12135 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12136 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 81%|████████  | 12139/15000 [24:52<06:11,  7.70it/s]


 epoch: 12137 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12138 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12139 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 81%|████████  | 12141/15000 [24:53<05:57,  8.00it/s]


input:       flow is the exchange of genes between populations and between species it can therefore be source of variation that

target:      flow is the exchange of genes between populations and between species it can therefore be source of variation that is

prediction:  flow is the exchange of genes between populations and between species it can therefore be source of variation that the

 epoch: 12140 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12141 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%


 81%|████████  | 12144/15000 [24:53<05:09,  9.21it/s]


 epoch: 12142 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 12143 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12144 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 81%|████████  | 12146/15000 [24:53<04:49,  9.84it/s]


 epoch: 12145 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12146 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12147 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 81%|████████  | 12148/15000 [24:53<06:01,  7.88it/s]


 epoch: 12148 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12149 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.4%

input:       to originate in the when in france paul broca traced production of speech to the left frontal gyrus thereby

target:      to originate in the when in france paul broca traced production of speech to the left frontal gyrus thereby also

prediction:  to originate in the when in france paul broca traced production of speech to the left frontal gyrus thereby the


 81%|████████  | 12151/15000 [24:54<05:39,  8.39it/s]


 epoch: 12150 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12151 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12152 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 81%|████████  | 12155/15000 [24:54<04:51,  9.75it/s]


 epoch: 12153 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12154 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12155 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.5%


 81%|████████  | 12157/15000 [24:54<04:48,  9.87it/s]


 epoch: 12156 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12157 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 12158 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 81%|████████  | 12159/15000 [24:54<04:49,  9.82it/s]


 epoch: 12159 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       universities were the university of bologna the university of paris and oxford university another key development was the creation

target:      universities were the university of bologna the university of paris and oxford university another key development was the creation of

prediction:  universities were the university of bologna the university of paris and oxford university another key development was the creation the

 epoch: 12160 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 81%|████████  | 12161/15000 [24:55<05:01,  9.40it/s]


 epoch: 12161 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 81%|████████  | 12164/15000 [24:55<07:19,  6.45it/s]


 epoch: 12162 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12163 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12164 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 81%|████████  | 12166/15000 [24:56<06:23,  7.40it/s]


 epoch: 12165 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12166 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12167 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 81%|████████  | 12168/15000 [24:56<05:38,  8.36it/s]


 epoch: 12168 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12169 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       jos ramos horta and bishop carlos filipe ximenes belo of timor leste kim dae jung and japanese scientists most


 81%|████████  | 12171/15000 [24:56<05:44,  8.21it/s]


target:      jos ramos horta and bishop carlos filipe ximenes belo of timor leste kim dae jung and japanese scientists most of

prediction:  jos ramos horta and bishop carlos filipe ximenes belo of timor leste kim dae jung and japanese scientists most the

 epoch: 12170 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12171 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 81%|████████  | 12173/15000 [24:56<05:40,  8.31it/s]


 epoch: 12172 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12173 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 81%|████████  | 12175/15000 [24:57<05:45,  8.19it/s]


 epoch: 12174 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12175 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 81%|████████  | 12177/15000 [24:57<09:51,  4.77it/s]


 epoch: 12176 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12177 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 81%|████████  | 12179/15000 [24:58<07:28,  6.30it/s]


 epoch: 12178 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 12179 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

input:       is first used in the th century bce by anaximander and hecataeus anaximander placed the boundary between asia and

target:      is first used in the th century bce by anaximander and hecataeus anaximander placed the boundary between asia and europe


 81%|████████  | 12181/15000 [24:58<06:54,  6.81it/s]


prediction:  is first used in the th century bce by anaximander and hecataeus anaximander placed the boundary between asia and the

 epoch: 12180 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12181 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 81%|████████  | 12183/15000 [24:58<06:02,  7.77it/s]


 epoch: 12182 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12183 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 81%|████████  | 12185/15000 [24:58<05:52,  7.99it/s]


 epoch: 12184 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12185 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 81%|████████  | 12187/15000 [24:59<05:37,  8.34it/s]


 epoch: 12186 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12187 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 81%|████████▏ | 12189/15000 [24:59<05:29,  8.53it/s]


 epoch: 12188 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12189 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

input:       aegean coast of asia minor was colonized first followed by cyprus and the coasts of thrace the sea of


 81%|████████▏ | 12190/15000 [24:59<06:05,  7.69it/s]


target:      aegean coast of asia minor was colonized first followed by cyprus and the coasts of thrace the sea of marmara

prediction:  aegean coast of asia minor was colonized first followed by cyprus and the coasts of thrace the sea of the

 epoch: 12190 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 81%|████████▏ | 12192/15000 [24:59<07:54,  5.91it/s]


 epoch: 12191 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12192 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 81%|████████▏ | 12194/15000 [25:00<06:39,  7.03it/s]


 epoch: 12193 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12194 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 81%|████████▏ | 12197/15000 [25:00<05:22,  8.70it/s]


 epoch: 12195 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12196 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12197 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%


 81%|████████▏ | 12199/15000 [25:00<04:59,  9.34it/s]


 epoch: 12198 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12199 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

input:       and constantius iii who succeeded honorius as augustus reigned for less than year galla placidia and constantius had two

target:      and constantius iii who succeeded honorius as augustus reigned for less than year galla placidia and constantius had two children

prediction:  and constantius iii who succeeded honorius as augustus reigned for less than year galla placidia and constantius had two the


 81%|████████▏ | 12201/15000 [25:00<05:18,  8.78it/s]


 epoch: 12200 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12201 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 81%|████████▏ | 12204/15000 [25:01<04:59,  9.33it/s]


 epoch: 12202 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12203 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12204 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 81%|████████▏ | 12206/15000 [25:01<07:58,  5.84it/s]


 epoch: 12205 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12206 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12207 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 81%|████████▏ | 12208/15000 [25:01<06:33,  7.09it/s]


 epoch: 12208 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.9%

 epoch: 12209 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       of valens left gratian and valentinian ii as the sole augusti gratian was now effectively responsible for the whole

target:      of valens left gratian and valentinian ii as the sole augusti gratian was now effectively responsible for the whole empire

prediction:  of valens left gratian and valentinian ii as the sole augusti gratian was now effectively responsible for the whole the


 81%|████████▏ | 12212/15000 [25:02<05:28,  8.49it/s]


 epoch: 12210 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12211 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12212 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 81%|████████▏ | 12215/15000 [25:02<05:03,  9.17it/s]


 epoch: 12213 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12214 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12215 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 81%|████████▏ | 12217/15000 [25:02<05:06,  9.08it/s]


 epoch: 12216 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%

 epoch: 12217 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 81%|████████▏ | 12218/15000 [25:02<05:09,  8.99it/s]


 epoch: 12218 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 81%|████████▏ | 12220/15000 [25:03<09:24,  4.93it/s]


 epoch: 12219 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       of machines like regular books and worksheets educational technology can benefit learning in various ways in the form of

target:      of machines like regular books and worksheets educational technology can benefit learning in various ways in the form of media

prediction:  of machines like regular books and worksheets educational technology can benefit learning in various ways in the form of the

 epoch: 12220 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 81%|████████▏ | 12222/15000 [25:03<07:10,  6.45it/s]


 epoch: 12221 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12222 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.8%

 epoch: 12223 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 82%|████████▏ | 12226/15000 [25:04<05:18,  8.70it/s]


 epoch: 12224 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12225 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12226 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 82%|████████▏ | 12228/15000 [25:04<04:56,  9.35it/s]


 epoch: 12227 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12228 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12229 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 82%|████████▏ | 12230/15000 [25:04<04:59,  9.25it/s]


input:       how it should be described each of them would analyze sentence such as this in different manner languages can

target:      how it should be described each of them would analyze sentence such as this in different manner languages can be

prediction:  how it should be described each of them would analyze sentence such as this in different manner languages can the

 epoch: 12230 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12231 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.7%


 82%|████████▏ | 12232/15000 [25:04<04:48,  9.60it/s]


 epoch: 12232 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 82%|████████▏ | 12235/15000 [25:05<06:27,  7.14it/s]


 epoch: 12233 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12234 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12235 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 82%|████████▏ | 12237/15000 [25:05<05:41,  8.10it/s]


 epoch: 12236 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12237 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12238 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 82%|████████▏ | 12239/15000 [25:05<05:06,  9.01it/s]


 epoch: 12239 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       in an optical mineralogy analysis petrologists analyze thin sections of rock samples using petrographic microscope where the minerals can

target:      in an optical mineralogy analysis petrologists analyze thin sections of rock samples using petrographic microscope where the minerals can be

prediction:  in an optical mineralogy analysis petrologists analyze thin sections of rock samples using petrographic microscope where the minerals can the

 epoch: 12240 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 82%|████████▏ | 12242/15000 [25:06<05:06,  9.01it/s]


 epoch: 12241 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12242 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 82%|████████▏ | 12244/15000 [25:06<04:58,  9.22it/s]


 epoch: 12243 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12244 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 82%|████████▏ | 12246/15000 [25:06<04:54,  9.35it/s]


 epoch: 12245 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12246 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 82%|████████▏ | 12247/15000 [25:06<04:53,  9.38it/s]


 epoch: 12247 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 82%|████████▏ | 12249/15000 [25:07<08:40,  5.29it/s]


 epoch: 12248 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12249 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

input:       continent of oceania when compared to the other continents oceania is the smallest in land area and the second

target:      continent of oceania when compared to the other continents oceania is the smallest in land area and the second least

prediction:  continent of oceania when compared to the other continents oceania is the smallest in land area and the second the


 82%|████████▏ | 12252/15000 [25:07<06:20,  7.22it/s]


 epoch: 12250 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12251 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12252 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 82%|████████▏ | 12255/15000 [25:07<05:10,  8.85it/s]


 epoch: 12253 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12254 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12255 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 82%|████████▏ | 12257/15000 [25:08<05:07,  8.93it/s]


 epoch: 12256 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12257 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 82%|████████▏ | 12258/15000 [25:08<05:02,  9.06it/s]


 epoch: 12258 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%

 epoch: 12259 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

input:       is the psychoeducation of patients instructing them in how to follow medical regimen health psychologists can also educate doctors

target:      is the psychoeducation of patients instructing them in how to follow medical regimen health psychologists can also educate doctors and

prediction: 

 82%|████████▏ | 12261/15000 [25:08<05:06,  8.94it/s]

 is the psychoeducation of patients instructing them in how to follow medical regimen health psychologists can also educate doctors the

 epoch: 12260 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12261 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 82%|████████▏ | 12263/15000 [25:08<05:08,  8.87it/s]


 epoch: 12262 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12263 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 82%|████████▏ | 12265/15000 [25:08<04:48,  9.48it/s]


 epoch: 12264 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12265 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12266 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 82%|████████▏ | 12269/15000 [25:09<04:27, 10.20it/s]


 epoch: 12267 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12268 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12269 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 82%|████████▏ | 12271/15000 [25:09<04:48,  9.47it/s]


input:       literature mento star lord flea stated in interview that he thought that west indians have the best sense of

target:      literature mento star lord flea stated in interview that he thought that west indians have the best sense of humour

prediction:  literature mento star lord flea stated in interview that he thought that west indians have the best sense of the

 epoch: 12270 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12271 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 82%|████████▏ | 12273/15000 [25:09<04:50,  9.37it/s]


 epoch: 12272 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 12273 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 82%|████████▏ | 12275/15000 [25:10<04:54,  9.26it/s]


 epoch: 12274 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12275 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 82%|████████▏ | 12277/15000 [25:10<09:11,  4.94it/s]


 epoch: 12276 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12277 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 82%|████████▏ | 12279/15000 [25:10<07:14,  6.27it/s]


 epoch: 12278 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12279 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

input:       military and the provincial government the military established control of territory through war but after city or people was

target:      military and the provincial government the military established control of territory through war but after city or people was brought


 82%|████████▏ | 12281/15000 [25:11<06:30,  6.97it/s]


prediction:  military and the provincial government the military established control of territory through war but after city or people was the

 epoch: 12280 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 12281 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 82%|████████▏ | 12283/15000 [25:11<05:49,  7.78it/s]


 epoch: 12282 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12283 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 82%|████████▏ | 12285/15000 [25:11<05:32,  8.15it/s]


 epoch: 12284 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12285 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 82%|████████▏ | 12287/15000 [25:11<05:45,  7.85it/s]


 epoch: 12286 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12287 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 82%|████████▏ | 12289/15000 [25:12<05:34,  8.10it/s]


 epoch: 12288 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12289 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 82%|████████▏ | 12290/15000 [25:12<06:25,  7.03it/s]


input:       december is an educational version of the game designed specifically for use in educational establishments such as schools and

target:      december is an educational version of the game designed specifically for use in educational establishments such as schools and built

prediction:  december is an educational version of the game designed specifically for use in educational establishments such as schools and the

 epoch: 12290 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 82%|████████▏ | 12292/15000 [25:13<09:52,  4.57it/s]


 epoch: 12291 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12292 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 82%|████████▏ | 12294/15000 [25:13<07:43,  5.84it/s]


 epoch: 12293 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12294 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 96.9%


 82%|████████▏ | 12296/15000 [25:13<06:38,  6.79it/s]


 epoch: 12295 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12296 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 82%|████████▏ | 12298/15000 [25:13<05:53,  7.64it/s]


 epoch: 12297 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12298 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%


 82%|████████▏ | 12300/15000 [25:14<05:56,  7.58it/s]


 epoch: 12299 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

input:       economy every year tourism new zealand the country official tourism agency is actively promoting the country as destination worldwide

target:      economy every year tourism new zealand the country official tourism agency is actively promoting the country as destination worldwide milford

prediction:  economy every year tourism new zealand the country official tourism agency is actively promoting the country as destination worldwide the

 epoch: 12300 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 82%|████████▏ | 12303/15000 [25:14<04:54,  9.15it/s]


 epoch: 12301 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12302 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12303 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12304 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 82%|████████▏ | 12307/15000 [25:14<05:35,  8.02it/s]


 epoch: 12305 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12306 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12307 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 82%|████████▏ | 12309/15000 [25:15<05:12,  8.61it/s]


 epoch: 12308 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12309 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

input:       from the th century onward portuguese explorers between and reached the tanimbar islands some of the caroline islands and

target:      from the th century onward portuguese explorers between and reached the tanimbar islands some of the caroline islands and west


 82%|████████▏ | 12310/15000 [25:15<05:42,  7.85it/s]


prediction:  from the th century onward portuguese explorers between and reached the tanimbar islands some of the caroline islands and the

 epoch: 12310 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12311 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 82%|████████▏ | 12313/15000 [25:15<05:09,  8.68it/s]


 epoch: 12312 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12313 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 82%|████████▏ | 12316/15000 [25:15<04:43,  9.47it/s]


 epoch: 12314 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12315 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12316 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 82%|████████▏ | 12318/15000 [25:16<04:38,  9.65it/s]


 epoch: 12317 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12318 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 82%|████████▏ | 12320/15000 [25:16<05:20,  8.36it/s]


 epoch: 12319 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

input:       rattle like musical instrument that was especially important in religious ceremonies the ancient egyptians enjoyed variety of leisure activities

target:      rattle like musical instrument that was especially important in religious ceremonies the ancient egyptians enjoyed variety of leisure activities including

prediction:  rattle like musical instrument that was especially important in religious ceremonies the ancient egyptians enjoyed variety of leisure activities the

 epoch: 12320 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 82%|████████▏ | 12322/15000 [25:16<05:01,  8.88it/s]


 epoch: 12321 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12322 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 82%|████████▏ | 12324/15000 [25:16<04:43,  9.44it/s]


 epoch: 12323 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12324 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12325 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 82%|████████▏ | 12328/15000 [25:17<04:19, 10.31it/s]


 epoch: 12326 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12327 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12328 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 82%|████████▏ | 12330/15000 [25:17<04:42,  9.46it/s]


 epoch: 12329 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

input:       and carl rogers who created and developed client centered therapy later positive psychology opened up humanistic themes to scientific

target:      and carl rogers who created and developed client centered therapy later positive psychology opened up humanistic themes to scientific study

prediction:  and carl rogers who created and developed client centered therapy later positive psychology opened up humanistic themes to scientific the

 epoch: 12330 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 96.9%


 82%|████████▏ | 12332/15000 [25:17<04:39,  9.53it/s]


 epoch: 12331 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12332 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 82%|████████▏ | 12333/15000 [25:17<04:36,  9.63it/s]


 epoch: 12333 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%


 82%|████████▏ | 12336/15000 [25:18<07:32,  5.89it/s]


 epoch: 12334 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12335 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12336 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 82%|████████▏ | 12338/15000 [25:18<06:17,  7.04it/s]


 epoch: 12337 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12338 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12339 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 82%|████████▏ | 12340/15000 [25:18<05:53,  7.53it/s]


input:       urban athens times the kg of an unskilled rural labourer in roman egypt though greek farm incomes too were

target:      urban athens times the kg of an unskilled rural labourer in roman egypt though greek farm incomes too were on

prediction:  urban athens times the kg of an unskilled rural labourer in roman egypt though greek farm incomes too were the

 epoch: 12340 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12341 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 82%|████████▏ | 12344/15000 [25:19<04:46,  9.27it/s]


 epoch: 12342 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12343 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12344 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 82%|████████▏ | 12346/15000 [25:19<04:35,  9.64it/s]


 epoch: 12345 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12346 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12347 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 82%|████████▏ | 12350/15000 [25:19<04:40,  9.44it/s]


 epoch: 12348 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12349 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

input:       unrelated to the programming paradigm for instance most programming languages use english language keywords while minority do not other

target:      unrelated to the programming paradigm for instance most programming languages use english language keywords while minority do not other languages

prediction:  unrelated to the programming paradigm for instance most programming languages use english language keywords while minority do not other the

 epoch: 12350 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.19, test_acc: 97.4%


 82%|████████▏ | 12352/15000 [25:20<04:27,  9.89it/s]


 epoch: 12351 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12352 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 12353 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 82%|████████▏ | 12356/15000 [25:20<04:05, 10.76it/s]


 epoch: 12354 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12355 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 12356 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.2%


 82%|████████▏ | 12358/15000 [25:20<04:05, 10.74it/s]


 epoch: 12357 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12358 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12359 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 82%|████████▏ | 12360/15000 [25:20<04:24, 10.00it/s]


input:       modern school systems organize students by age competence specialization and native language into different classes to ensure productive learning

target:      modern school systems organize students by age competence specialization and native language into different classes to ensure productive learning process

prediction:  modern school systems organize students by age competence specialization and native language into different classes to ensure productive learning the

 epoch: 12360 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12361 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 82%|████████▏ | 12364/15000 [25:21<06:09,  7.13it/s]


 epoch: 12362 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12363 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12364 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 82%|████████▏ | 12367/15000 [25:21<05:16,  8.33it/s]


 epoch: 12365 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12366 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12367 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 82%|████████▏ | 12369/15000 [25:22<04:51,  9.04it/s]


 epoch: 12368 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12369 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

input:       advanced their own theories of nature physics was known as natural philosophy until the late th century by the

target:      advanced their own theories of nature physics was known as natural philosophy until the late th century by the th

prediction:  advanced their own theories of nature physics was known as natural philosophy until the late th century by the the


 82%|████████▏ | 12371/15000 [25:22<04:51,  9.02it/s]


 epoch: 12370 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12371 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12372 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 82%|████████▎ | 12375/15000 [25:22<04:26,  9.84it/s]


 epoch: 12373 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12374 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12375 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 83%|████████▎ | 12377/15000 [25:23<07:10,  6.09it/s]


 epoch: 12376 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 12377 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12378 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 83%|████████▎ | 12380/15000 [25:23<06:08,  7.12it/s]


 epoch: 12379 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

input:       and north of the parthenon was the magnificent erechtheion containing three separate temples one to athena polias or the

target:      and north of the parthenon was the magnificent erechtheion containing three separate temples one to athena polias or the protectress

prediction:  and north of the parthenon was the magnificent erechtheion containing three separate temples one to athena polias or the the

 epoch: 12380 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 83%|████████▎ | 12382/15000 [25:23<05:38,  7.74it/s]


 epoch: 12381 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12382 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 83%|████████▎ | 12384/15000 [25:23<05:12,  8.37it/s]


 epoch: 12383 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12384 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 83%|████████▎ | 12386/15000 [25:24<05:36,  7.78it/s]


 epoch: 12385 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12386 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%


 83%|████████▎ | 12388/15000 [25:24<05:26,  8.01it/s]


 epoch: 12387 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12388 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 83%|████████▎ | 12389/15000 [25:24<05:22,  8.10it/s]


 epoch: 12389 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       placing himself as zeno nominal subordinate in reality italy was ruled by odoacer alone the eastern roman empire called

target:      placing himself as zeno nominal subordinate in reality italy was ruled by odoacer alone the eastern roman empire called the

prediction:  placing himself as zeno nominal subordinate in reality italy was ruled by odoacer alone the eastern roman empire called the


 83%|████████▎ | 12391/15000 [25:25<09:27,  4.59it/s]


 epoch: 12390 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12391 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 83%|████████▎ | 12393/15000 [25:25<07:06,  6.11it/s]


 epoch: 12392 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12393 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 83%|████████▎ | 12395/15000 [25:25<06:01,  7.20it/s]


 epoch: 12394 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 12395 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 83%|████████▎ | 12397/15000 [25:26<05:18,  8.16it/s]


 epoch: 12396 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12397 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 83%|████████▎ | 12399/15000 [25:26<05:00,  8.65it/s]


 epoch: 12398 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12399 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

input:       stephen hawking and george ellis published papers where they showed that mathematical singularities were an inevitable initial condition


 83%|████████▎ | 12400/15000 [25:26<05:53,  7.36it/s]


target:      stephen hawking and george ellis published papers where they showed that mathematical singularities were an inevitable initial condition of

prediction:  stephen hawking and george ellis published papers where they showed that mathematical singularities were an inevitable initial condition the

 epoch: 12400 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 83%|████████▎ | 12402/15000 [25:26<05:59,  7.22it/s]


 epoch: 12401 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12402 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.8%


 83%|████████▎ | 12403/15000 [25:26<05:57,  7.26it/s]


 epoch: 12403 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 83%|████████▎ | 12405/15000 [25:27<09:59,  4.33it/s]


 epoch: 12404 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12405 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 83%|████████▎ | 12408/15000 [25:27<06:32,  6.60it/s]


 epoch: 12406 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12407 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12408 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 83%|████████▎ | 12410/15000 [25:28<06:01,  7.17it/s]


 epoch: 12409 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       that was rich in quarries and gold mines while laborers built defensive structure in the eastern delta called the

target:      that was rich in quarries and gold mines while laborers built defensive structure in the eastern delta called the walls

prediction:  that was rich in quarries and gold mines while laborers built defensive structure in the eastern delta called the the

 epoch: 12410 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 83%|████████▎ | 12412/15000 [25:28<05:19,  8.10it/s]


 epoch: 12411 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12412 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12413 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%


 83%|████████▎ | 12415/15000 [25:28<04:49,  8.92it/s]


 epoch: 12414 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12415 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12416 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.8%


 83%|████████▎ | 12417/15000 [25:28<04:31,  9.51it/s]


 epoch: 12417 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12418 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 83%|████████▎ | 12420/15000 [25:29<05:35,  7.68it/s]


 epoch: 12419 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       elderly leonardo with his right arm wrapped in clothing the latter in addition to the record of an october

target:      elderly leonardo with his right arm wrapped in clothing the latter in addition to the record of an october visit

prediction:  elderly leonardo with his right arm wrapped in clothing the latter in addition to the record of an october the

 epoch: 12420 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 83%|████████▎ | 12422/15000 [25:29<05:09,  8.34it/s]


 epoch: 12421 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12422 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 83%|████████▎ | 12425/15000 [25:29<04:33,  9.43it/s]


 epoch: 12423 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12424 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.4%

 epoch: 12425 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 83%|████████▎ | 12427/15000 [25:29<04:22,  9.79it/s]


 epoch: 12426 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.4%

 epoch: 12427 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 83%|████████▎ | 12429/15000 [25:30<04:36,  9.31it/s]


 epoch: 12428 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12429 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       soldier of the spartan army permanently in arms rich and poor citizens alike were obliged to live and train

target:     

 83%|████████▎ | 12431/15000 [25:30<05:04,  8.45it/s]

 soldier of the spartan army permanently in arms rich and poor citizens alike were obliged to live and train as

prediction:  soldier of the spartan army permanently in arms rich and poor citizens alike were obliged to live and train the

 epoch: 12430 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12431 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 83%|████████▎ | 12432/15000 [25:30<04:55,  8.68it/s]


 epoch: 12432 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 83%|████████▎ | 12435/15000 [25:31<06:00,  7.12it/s]


 epoch: 12433 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12434 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12435 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 83%|████████▎ | 12438/15000 [25:31<04:52,  8.75it/s]


 epoch: 12436 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 96.8%

 epoch: 12437 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12438 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 96.9%


 83%|████████▎ | 12440/15000 [25:31<04:46,  8.95it/s]


 epoch: 12439 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

input:       poll found that americans were the most supportive of free expression of any polity measured they are also the

target:      poll found that americans were the most supportive of free expression of any polity measured they are also the most

prediction:  poll found that americans were the most supportive of free expression of any polity measured they are also the the

 epoch: 12440 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.3%


 83%|████████▎ | 12443/15000 [25:31<04:27,  9.57it/s]


 epoch: 12441 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12442 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12443 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 83%|████████▎ | 12445/15000 [25:32<04:29,  9.49it/s]


 epoch: 12444 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12445 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12446 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 83%|████████▎ | 12448/15000 [25:32<07:27,  5.71it/s]


 epoch: 12447 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 12448 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12449 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.0%


 83%|████████▎ | 12451/15000 [25:33<05:56,  7.15it/s]


input:       submissions with more upvotes appear towards the top of their subreddit and if they receive enough upvotes ultimately on

target:      submissions with more upvotes appear towards the top of their subreddit and if they receive enough upvotes ultimately on the

prediction:  submissions with more upvotes appear towards the top of their subreddit and if they receive enough upvotes ultimately on the

 epoch: 12450 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%

 epoch: 12451 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 83%|████████▎ | 12454/15000 [25:33<04:59,  8.49it/s]


 epoch: 12452 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12453 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12454 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 83%|████████▎ | 12456/15000 [25:33<04:33,  9.29it/s]


 epoch: 12455 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12456 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12457 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 83%|████████▎ | 12460/15000 [25:34<04:15,  9.94it/s]


 epoch: 12458 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12459 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       periods of stasis where species remain relatively unchanged in this theory speciation and rapid evolution are linked with natural

target:      periods of stasis where species remain relatively unchanged in this theory speciation and rapid evolution are linked with natural selection

prediction:  periods of stasis where species remain relatively unchanged in this theory speciation and rapid evolution are linked with natural the

 epoch: 12460 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 83%|████████▎ | 12462/15000 [25:34<05:38,  7.50it/s]


 epoch: 12461 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%

 epoch: 12462 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.26, test_acc: 96.9%

 epoch: 12463 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 83%|████████▎ | 12466/15000 [25:34<04:35,  9.20it/s]


 epoch: 12464 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12465 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12466 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 83%|████████▎ | 12468/15000 [25:34<04:19,  9.75it/s]


 epoch: 12467 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12468 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12469 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 83%|████████▎ | 12471/15000 [25:35<04:25,  9.52it/s]


input:       the open source community was shared on github in december demanding that the company drop its contract with ice

target:      the open source community was shared on github in december demanding that the company drop its contract with ice and

prediction:  the open source community was shared on github in december demanding that the company drop its contract with ice the

 epoch: 12470 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12471 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 83%|████████▎ | 12473/15000 [25:35<04:16,  9.84it/s]


 epoch: 12472 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12473 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12474 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 83%|████████▎ | 12476/15000 [25:36<06:40,  6.30it/s]


 epoch: 12475 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12476 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12477 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 83%|████████▎ | 12478/15000 [25:36<05:41,  7.39it/s]


 epoch: 12478 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12479 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       his proposal to adhere to major rivers as the line of demarcation was never taken up by other geographers

target:      his proposal to adhere to major rivers as the line of demarcation was never taken up by other geographers who

prediction:  his proposal to adhere to major rivers as the line of demarcation was never taken up by other geographers the


 83%|████████▎ | 12482/15000 [25:36<04:44,  8.85it/s]


 epoch: 12480 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12481 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12482 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 83%|████████▎ | 12484/15000 [25:36<04:28,  9.37it/s]


 epoch: 12483 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12484 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12485 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 83%|████████▎ | 12488/15000 [25:37<04:14,  9.89it/s]


 epoch: 12486 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 12487 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12488 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 83%|████████▎ | 12490/15000 [25:38<07:34,  5.52it/s]


 epoch: 12489 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

input:       died in his brother domitian succeeded him having exceedingly poor relations with the senate domitian was murdered in september

target:      died in his brother domitian succeeded him having exceedingly poor relations with the senate domitian was murdered in september the

prediction:  died in his brother domitian succeeded him having exceedingly poor relations with the senate domitian was murdered in september the

 epoch: 12490 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 83%|████████▎ | 12492/15000 [25:38<06:46,  6.17it/s]


 epoch: 12491 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12492 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 83%|████████▎ | 12494/15000 [25:38<05:57,  7.01it/s]


 epoch: 12493 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12494 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 96.9%


 83%|████████▎ | 12496/15000 [25:38<05:29,  7.59it/s]


 epoch: 12495 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12496 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 83%|████████▎ | 12498/15000 [25:39<05:17,  7.88it/s]


 epoch: 12497 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12498 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 83%|████████▎ | 12500/15000 [25:39<06:03,  6.88it/s]


 epoch: 12499 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

input:       states attempt to dominate the mainland none were successful and their resulting weakness led to power vacuum which would

target:      states attempt to dominate the mainland none were successful and their resulting weakness led to power vacuum which would eventually

prediction:  states attempt to dominate the mainland none were successful and their resulting weakness led to power vacuum which would the

 epoch: 12500 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 83%|████████▎ | 12502/15000 [25:39<05:26,  7.65it/s]


 epoch: 12501 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

 epoch: 12502 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 83%|████████▎ | 12503/15000 [25:39<05:21,  7.76it/s]


 epoch: 12503 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 83%|████████▎ | 12505/15000 [25:40<08:23,  4.95it/s]


 epoch: 12504 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12505 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 83%|████████▎ | 12507/15000 [25:40<06:29,  6.41it/s]


 epoch: 12506 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12507 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 83%|████████▎ | 12509/15000 [25:40<05:49,  7.12it/s]


 epoch: 12508 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12509 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.4%


 83%|████████▎ | 12510/15000 [25:41<06:29,  6.40it/s]


input:       be argued is that there is material mimicry of an idea thus every instance of meme would not be

target:      be argued is that there is material mimicry of an idea thus every instance of meme would not be true

prediction:  be argued is that there is material mimicry of an idea thus every instance of meme would not be the

 epoch: 12510 | train_loss: 0.26, train_acc: 96.7% | test_loss: 0.27, test_acc: 96.9%


 83%|████████▎ | 12512/15000 [25:41<05:47,  7.16it/s]


 epoch: 12511 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12512 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 83%|████████▎ | 12514/15000 [25:41<05:22,  7.70it/s]


 epoch: 12513 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

 epoch: 12514 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 83%|████████▎ | 12516/15000 [25:41<05:07,  8.07it/s]


 epoch: 12515 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12516 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 83%|████████▎ | 12517/15000 [25:41<04:55,  8.39it/s]


 epoch: 12517 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 83%|████████▎ | 12518/15000 [25:42<07:55,  5.22it/s]


 epoch: 12518 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12519 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

input:       systematic collection analysis and application of information to answer questions about projects policies and programs particularly about their effectiveness

target:      systematic collection analysis and application of information to answer questions about projects policies and programs particularly about their effectiveness in

prediction:  systematic collection analysis and application of information to answer questions about projects policies and programs particularly about their effectiveness the


 83%|████████▎ | 12521/15000 [25:42<06:01,  6.87it/s]


 epoch: 12520 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12521 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12522 | train_loss: 0.19, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.4%


 84%|████████▎ | 12525/15000 [25:42<04:40,  8.82it/s]


 epoch: 12523 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12524 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12525 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 84%|████████▎ | 12528/15000 [25:43<04:21,  9.44it/s]


 epoch: 12526 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12527 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12528 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 84%|████████▎ | 12530/15000 [25:43<04:48,  8.55it/s]


 epoch: 12529 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       other planets the moon formed roughly million years later initially molten the outer layer of the earth cooled resulting

target:      other planets the moon formed roughly million years later initially molten the outer layer of the earth cooled resulting in

prediction:  other planets the moon formed roughly million years later initially molten the outer layer of the earth cooled resulting the

 epoch: 12530 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 84%|████████▎ | 12532/15000 [25:43<05:26,  7.55it/s]


 epoch: 12531 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12532 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 84%|████████▎ | 12534/15000 [25:43<04:52,  8.44it/s]


 epoch: 12533 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12534 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 84%|████████▎ | 12536/15000 [25:44<04:31,  9.07it/s]


 epoch: 12535 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12536 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12537 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 84%|████████▎ | 12538/15000 [25:44<04:17,  9.55it/s]


 epoch: 12538 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12539 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

input:       the console versions are not inferior because of the pc superiority for competition dyer wrote that the playstation version

target:      the console versions are not inferior because of the pc superiority for competition dyer wrote that the playstation version was


 84%|████████▎ | 12540/15000 [25:44<04:40,  8.76it/s]


prediction:  the console versions are not inferior because of the pc superiority for competition dyer wrote that the playstation version the

 epoch: 12540 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12541 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 84%|████████▎ | 12544/15000 [25:45<04:16,  9.57it/s]


 epoch: 12542 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 12543 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12544 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 84%|████████▎ | 12545/15000 [25:45<04:14,  9.63it/s]


 epoch: 12545 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 84%|████████▎ | 12548/15000 [25:45<06:12,  6.59it/s]


 epoch: 12546 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12547 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12548 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 84%|████████▎ | 12550/15000 [25:46<05:38,  7.24it/s]


 epoch: 12549 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       nations especially among blacks and hispanics the health care system far outspends that of any other nation measured

target:      nations especially among blacks and hispanics the health care system far outspends that of any other nation measured both

prediction:  nations especially among blacks and hispanics the health care system far outspends that of any other nation measured the

 epoch: 12550 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 84%|████████▎ | 12552/15000 [25:46<05:10,  7.89it/s]


 epoch: 12551 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12552 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 84%|████████▎ | 12554/15000 [25:46<04:43,  8.62it/s]


 epoch: 12553 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12554 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12555 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 84%|████████▎ | 12556/15000 [25:46<04:31,  9.01it/s]



 epoch: 12556 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 84%|████████▎ | 12558/15000 [25:46<04:17,  9.47it/s]


 epoch: 12557 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 12558 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12559 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       mardonius attempted to restore classical roman and hellenistic religion only briefly interrupted the succession of christian emperors theodosius the

target:      mardonius attempted to restore classical roman and hellenistic religion only briefly interrupted the succession of christian emperors theodosius the last

prediction:  mardonius attempted to restore classical roman and hellenistic religion only briefly interrupted the succession of christian emperors theodosius the the


 84%|████████▎ | 12562/15000 [25:47<05:13,  7.77it/s]


 epoch: 12560 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12561 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12562 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 84%|████████▍ | 12565/15000 [25:47<04:34,  8.88it/s]


 epoch: 12563 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12564 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12565 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 84%|████████▍ | 12568/15000 [25:48<04:10,  9.71it/s]


 epoch: 12566 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12567 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

 epoch: 12568 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.24, test_acc: 97.0%


 84%|████████▍ | 12570/15000 [25:48<04:19,  9.36it/s]


 epoch: 12569 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%

input:       conducive to settlements and encouraged migrations of farming communities to the more tropical climate of west africa during the

target:      conducive to settlements and encouraged migrations of farming communities to the more tropical climate of west africa during the first

prediction:  conducive to settlements and encouraged migrations of farming communities to the more tropical climate of west africa during the the

 epoch: 12570 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 84%|████████▍ | 12572/15000 [25:48<04:20,  9.32it/s]


 epoch: 12571 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12572 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 84%|████████▍ | 12573/15000 [25:48<04:17,  9.42it/s]


 epoch: 12573 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 84%|████████▍ | 12576/15000 [25:49<04:48,  8.40it/s]


 epoch: 12574 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12575 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.27, test_acc: 96.9%

 epoch: 12576 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 84%|████████▍ | 12578/15000 [25:49<04:21,  9.26it/s]


 epoch: 12577 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12578 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12579 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 84%|████████▍ | 12580/15000 [25:49<04:24,  9.16it/s]


input:       rearranged in different patterns and the productivity of the linguistic system meaning that the finite number of linguistic elements

target:      rearranged in different patterns and the productivity of the linguistic system meaning that the finite number of linguistic elements can

prediction:  rearranged in different patterns and the productivity of the linguistic system meaning that the finite number of linguistic elements the

 epoch: 12580 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12581 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%


 84%|████████▍ | 12583/15000 [25:49<04:11,  9.60it/s]


 epoch: 12582 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12583 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 84%|████████▍ | 12585/15000 [25:49<04:09,  9.70it/s]


 epoch: 12584 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12585 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 84%|████████▍ | 12587/15000 [25:50<04:08,  9.71it/s]


 epoch: 12586 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.4%

 epoch: 12587 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%


 84%|████████▍ | 12589/15000 [25:50<07:43,  5.20it/s]


 epoch: 12588 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12589 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       the in github was ranked no on the forbes cloud list it was not featured on and lists

target:      the in github was ranked no on the forbes cloud list it was not featured on and lists on


 84%|████████▍ | 12590/15000 [25:50<07:14,  5.54it/s]


prediction:  the in github was ranked no on the forbes cloud list it was not featured on and lists the

 epoch: 12590 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12591 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 84%|████████▍ | 12594/15000 [25:51<04:54,  8.17it/s]


 epoch: 12592 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.8%

 epoch: 12593 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12594 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 84%|████████▍ | 12596/15000 [25:51<04:35,  8.74it/s]


 epoch: 12595 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12596 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 84%|████████▍ | 12597/15000 [25:51<04:27,  9.00it/s]


 epoch: 12597 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12598 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 84%|████████▍ | 12600/15000 [25:51<04:51,  8.23it/s]


 epoch: 12599 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       the cultivated leisure otium associated with the villa lifestyle significant collections might attract in house scholars and an individual

target:      the cultivated leisure otium associated with the villa lifestyle significant collections might attract in house scholars and an individual benefactor

prediction:  the cultivated leisure otium associated with the villa lifestyle significant collections might attract in house scholars and an individual the

 epoch: 12600 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 84%|████████▍ | 12602/15000 [25:52<04:44,  8.44it/s]


 epoch: 12601 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 12602 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 84%|████████▍ | 12604/15000 [25:52<06:23,  6.25it/s]


 epoch: 12603 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12604 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 84%|████████▍ | 12606/15000 [25:52<05:36,  7.11it/s]


 epoch: 12605 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12606 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 84%|████████▍ | 12608/15000 [25:53<05:10,  7.70it/s]


 epoch: 12607 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12608 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 84%|████████▍ | 12610/15000 [25:53<05:58,  6.66it/s]


 epoch: 12609 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

input:       debugging include an early example of anti debugging existed in early versions of microsoft word which if debugger was

target:      debugging include an early example of anti debugging existed in early versions of microsoft word which if debugger was detected

prediction:  debugging include an early example of anti debugging existed in early versions of microsoft word which if debugger was the

 epoch: 12610 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%


 84%|████████▍ | 12612/15000 [25:53<05:25,  7.33it/s]


 epoch: 12611 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 12612 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 84%|████████▍ | 12614/15000 [25:53<05:09,  7.70it/s]


 epoch: 12613 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12614 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 84%|████████▍ | 12616/15000 [25:54<04:55,  8.07it/s]


 epoch: 12615 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12616 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 84%|████████▍ | 12618/15000 [25:54<06:03,  6.55it/s]


 epoch: 12617 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12618 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12619 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 84%|████████▍ | 12621/15000 [25:54<05:08,  7.70it/s]


input:       united states followed by the united kingdom at and canada at twenty two percent of adults aged to

target:      united states followed by the united kingdom at and canada at twenty two percent of adults aged to years

prediction:  united states followed by the united kingdom at and canada at twenty two percent of adults aged to the

 epoch: 12620 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12621 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 84%|████████▍ | 12623/15000 [25:55<04:48,  8.23it/s]


 epoch: 12622 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%

 epoch: 12623 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 84%|████████▍ | 12625/15000 [25:55<04:33,  8.70it/s]


 epoch: 12624 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12625 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 84%|████████▍ | 12628/15000 [25:55<04:29,  8.81it/s]


 epoch: 12626 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12627 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.4%

 epoch: 12628 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 84%|████████▍ | 12630/15000 [25:56<05:06,  7.72it/s]


 epoch: 12629 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

input:       tripathi death was result of suicide reddit general manager erik martin later issued an apology for this behavior criticizing

target:      tripathi death was result of suicide reddit general manager erik martin later issued an apology for this behavior criticizing the

prediction:  tripathi death was result of suicide reddit general manager erik martin later issued an apology for this behavior criticizing the

 epoch: 12630 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 84%|████████▍ | 12633/15000 [25:56<07:24,  5.33it/s]


 epoch: 12631 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12632 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12633 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 84%|████████▍ | 12636/15000 [25:57<05:30,  7.14it/s]


 epoch: 12634 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12635 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12636 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 84%|████████▍ | 12638/15000 [25:57<04:46,  8.25it/s]


 epoch: 12637 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12638 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12639 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 84%|████████▍ | 12640/15000 [25:57<04:40,  8.42it/s]


input:       from near the indus valley dating to around bc agriculture began in the indus valley around bc and reached

target:      from near the indus valley dating to around bc agriculture began in the indus valley around bc and reached the

prediction:  from near the indus valley dating to around bc agriculture began in the indus valley around bc and reached the

 epoch: 12640 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12641 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 84%|████████▍ | 12644/15000 [25:57<04:09,  9.45it/s]


 epoch: 12642 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12643 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12644 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 84%|████████▍ | 12647/15000 [25:58<05:58,  6.57it/s]


 epoch: 12645 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12646 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12647 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 84%|████████▍ | 12649/15000 [25:58<05:11,  7.55it/s]


 epoch: 12648 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12649 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

input:       are integer expressions they cannot be passed to function that expects string or stored in variable that is defined

target:      are integer expressions they cannot be passed to function that expects string or stored in variable that is defined to

prediction:  are integer expressions they cannot be passed to function that expects string or stored in variable that is defined the


 84%|████████▍ | 12652/15000 [25:59<04:37,  8.45it/s]


 epoch: 12650 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12651 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12652 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 84%|████████▍ | 12655/15000 [25:59<04:09,  9.39it/s]


 epoch: 12653 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12654 | train_loss: 0.19, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.0%

 epoch: 12655 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 84%|████████▍ | 12658/15000 [25:59<03:57,  9.84it/s]


 epoch: 12656 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12657 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.4%

 epoch: 12658 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12659 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

input:       linguistics languages express meaning by relating sign form to meaning or its content sign forms must be something that

target:      linguistics languages express meaning by relating sign form to meaning or its content sign forms must be something that can

prediction:  linguistics languages express meaning by relating sign form to meaning or its content sign forms must be something that the


 84%|████████▍ | 12662/15000 [26:00<05:35,  6.97it/s]


 epoch: 12660 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12661 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12662 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 84%|████████▍ | 12665/15000 [26:00<04:46,  8.16it/s]


 epoch: 12663 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12664 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12665 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 84%|████████▍ | 12668/15000 [26:01<04:12,  9.25it/s]


 epoch: 12666 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12667 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12668 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 84%|████████▍ | 12670/15000 [26:01<04:20,  8.96it/s]


 epoch: 12669 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

input:       shelf consists of ice walls that rest on rock is ice streams or the edge of glaciers and the

target:      shelf consists of ice walls that rest on rock is ice streams or the edge of glaciers and the remaining

prediction:  shelf consists of ice walls that rest on rock is ice streams or the edge of glaciers and the the

 epoch: 12670 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 84%|████████▍ | 12672/15000 [26:01<04:15,  9.11it/s]


 epoch: 12671 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12672 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 84%|████████▍ | 12673/15000 [26:01<04:12,  9.23it/s]


 epoch: 12673 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 84%|████████▍ | 12675/15000 [26:02<06:51,  5.65it/s]


 epoch: 12674 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12675 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 85%|████████▍ | 12677/15000 [26:02<05:32,  7.00it/s]


 epoch: 12676 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12677 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12678 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 85%|████████▍ | 12680/15000 [26:02<04:49,  8.01it/s]


 epoch: 12679 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.4%

input:       since their most recent common ancestor it may still be possible for them to produce offspring as with horses

target:      since their most recent common ancestor it may still be possible for them to produce offspring as with horses and

prediction:  since their most recent common ancestor it may still be possible for them to produce offspring as with horses the

 epoch: 12680 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▍ | 12682/15000 [26:02<04:20,  8.90it/s]


 epoch: 12681 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12682 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12683 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 85%|████████▍ | 12685/15000 [26:03<03:59,  9.66it/s]


 epoch: 12684 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12685 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.0%


 85%|████████▍ | 12687/15000 [26:03<04:01,  9.57it/s]


 epoch: 12686 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12687 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.9%


 85%|████████▍ | 12688/15000 [26:03<08:32,  4.51it/s]


 epoch: 12688 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12689 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

input:       us which indicates masculine gender singular number and nominative case these languages are called fusional languages because several meanings

target:      us which indicates masculine gender singular number and nominative case these languages are called fusional languages because several meanings may

prediction:  us which indicates masculine gender singular number and nominative case these languages are called fusional languages because several meanings the


 85%|████████▍ | 12692/15000 [26:04<05:28,  7.03it/s]


 epoch: 12690 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12691 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12692 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 85%|████████▍ | 12695/15000 [26:04<04:30,  8.53it/s]


 epoch: 12693 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12694 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12695 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 85%|████████▍ | 12697/15000 [26:04<04:09,  9.23it/s]


 epoch: 12696 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12697 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12698 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.4%


 85%|████████▍ | 12699/15000 [26:05<03:58,  9.64it/s]


 epoch: 12699 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

input:       cup in qatar as the first african nation to reach the semi finals of the fifa men world cup

target:      cup in qatar as the first african nation to reach the semi finals of the fifa men world cup south

prediction:  cup in qatar as the first african nation to reach the semi finals of the fifa men world cup the

 epoch: 12700 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 85%|████████▍ | 12701/15000 [26:05<04:09,  9.21it/s]


 epoch: 12701 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 85%|████████▍ | 12703/15000 [26:05<06:48,  5.62it/s]


 epoch: 12702 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12703 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%


 85%|████████▍ | 12706/15000 [26:06<05:06,  7.49it/s]


 epoch: 12704 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12705 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12706 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 85%|████████▍ | 12708/15000 [26:06<04:35,  8.32it/s]


 epoch: 12707 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12708 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 85%|████████▍ | 12710/15000 [26:06<05:01,  7.59it/s]


 epoch: 12709 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

input:       ppp per day in compared with for india sub saharan africa is the least successful region of the world

target:      ppp per day in compared with for india sub saharan africa is the least successful region of the world in

prediction:  ppp per day in compared with for india sub saharan africa is the least successful region of the world the

 epoch: 12710 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 85%|████████▍ | 12712/15000 [26:06<04:44,  8.04it/s]


 epoch: 12711 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12712 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%


 85%|████████▍ | 12714/15000 [26:07<04:42,  8.09it/s]


 epoch: 12713 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12714 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▍ | 12715/15000 [26:07<04:46,  7.99it/s]


 epoch: 12715 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▍ | 12717/15000 [26:07<06:48,  5.59it/s]


 epoch: 12716 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12717 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 85%|████████▍ | 12719/15000 [26:08<05:36,  6.78it/s]


 epoch: 12718 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12719 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 85%|████████▍ | 12720/15000 [26:08<06:04,  6.26it/s]


input:       times the country current demand in brazil was the th country in the world in terms of installed wind

target:      times the country current demand in brazil was the th country in the world in terms of installed wind power

prediction:  times the country current demand in brazil was the th country in the world in terms of installed wind the

 epoch: 12720 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▍ | 12723/15000 [26:08<05:07,  7.41it/s]


 epoch: 12721 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12722 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12723 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 85%|████████▍ | 12725/15000 [26:08<04:58,  7.62it/s]


 epoch: 12724 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12725 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 85%|████████▍ | 12727/15000 [26:09<04:47,  7.92it/s]


 epoch: 12726 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12727 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 85%|████████▍ | 12729/15000 [26:09<04:41,  8.07it/s]


 epoch: 12728 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12729 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▍ | 12730/15000 [26:09<05:18,  7.13it/s]


input:       terrain known indigenous tribes in papua new guinea have very little contact with local authorities aside from the authorities

target:      terrain known indigenous tribes in papua new guinea have very little contact with local authorities aside from the authorities knowing

prediction:  terrain known indigenous tribes in papua new guinea have very little contact with local authorities aside from the authorities the

 epoch: 12730 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 85%|████████▍ | 12732/15000 [26:10<06:47,  5.56it/s]


 epoch: 12731 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12732 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 85%|████████▍ | 12734/15000 [26:10<05:41,  6.63it/s]


 epoch: 12733 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12734 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 85%|████████▍ | 12736/15000 [26:10<05:07,  7.36it/s]


 epoch: 12735 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

 epoch: 12736 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 85%|████████▍ | 12738/15000 [26:10<04:46,  7.90it/s]


 epoch: 12737 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 12738 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 85%|████████▍ | 12741/15000 [26:11<04:31,  8.32it/s]


 epoch: 12739 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       and guiding students as well as assessing educational performance it can also make information easier to understand for example

target:      and guiding students as well as assessing educational performance it can also make information easier to understand for example by

prediction:  and guiding students as well as assessing educational performance it can also make information easier to understand for example the

 epoch: 12740 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 12741 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▍ | 12744/15000 [26:11<04:05,  9.17it/s]


 epoch: 12742 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12743 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12744 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 85%|████████▍ | 12747/15000 [26:12<05:53,  6.38it/s]


 epoch: 12745 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 96.9%

 epoch: 12746 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12747 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 85%|████████▍ | 12749/15000 [26:12<04:57,  7.56it/s]


 epoch: 12748 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%

 epoch: 12749 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       in those in middle income countries and in those in high income countries access to basic water sanitation and

target:      in those in middle income countries and in those in high income countries access to basic water sanitation and hygiene

prediction:  in those in middle income countries and in those in high income countries access to basic water sanitation and the


 85%|████████▌ | 12752/15000 [26:12<04:24,  8.50it/s]


 epoch: 12750 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12751 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12752 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▌ | 12754/15000 [26:12<04:07,  9.09it/s]


 epoch: 12753 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12754 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12755 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▌ | 12758/15000 [26:13<03:52,  9.65it/s]


 epoch: 12756 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12757 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12758 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▌ | 12760/15000 [26:13<04:50,  7.71it/s]


 epoch: 12759 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

input:       species evolution by natural selection is established by observable facts about living organisms more offspring are often produced than

target:      species evolution by natural selection is established by observable facts about living organisms more offspring are often produced than can

prediction:  species evolution by natural selection is established by observable facts about living organisms more offspring are often produced than the

 epoch: 12760 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 85%|████████▌ | 12762/15000 [26:13<04:23,  8.49it/s]


 epoch: 12761 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12762 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.4%


 85%|████████▌ | 12765/15000 [26:14<03:56,  9.44it/s]


 epoch: 12763 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12764 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.4%

 epoch: 12765 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 85%|████████▌ | 12767/15000 [26:14<03:42, 10.03it/s]


 epoch: 12766 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12767 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12768 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 85%|████████▌ | 12769/15000 [26:14<03:36, 10.29it/s]


 epoch: 12769 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       the sun baked banks of the euphrates in syria from the great rhine danube river system which snaked across

target:      the sun baked banks of the euphrates in syria from the great rhine danube river system which snaked across the

prediction:  the sun baked banks of the euphrates in syria from the great rhine danube river system which snaked across the

 epoch: 12770 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▌ | 12772/15000 [26:14<03:56,  9.44it/s]


 epoch: 12771 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12772 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 85%|████████▌ | 12774/15000 [26:15<06:33,  5.66it/s]


 epoch: 12773 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 12774 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 85%|████████▌ | 12777/15000 [26:15<04:51,  7.63it/s]


 epoch: 12775 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12776 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12777 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.1%


 85%|████████▌ | 12779/15000 [26:15<04:20,  8.53it/s]


 epoch: 12778 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12779 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%

input:       reviews from critics on release who praised the game for its gameplay and faithfulness to the counter strike series

target:      reviews from critics on release who praised the game for its gameplay and faithfulness to the counter strike series though

prediction:  reviews from critics on release who praised the game for its gameplay and faithfulness to the counter strike series the


 85%|████████▌ | 12781/15000 [26:16<04:19,  8.55it/s]


 epoch: 12780 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12781 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12782 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 85%|████████▌ | 12785/15000 [26:16<03:41,  9.98it/s]


 epoch: 12783 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12784 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12785 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 85%|████████▌ | 12787/15000 [26:16<03:40, 10.02it/s]


 epoch: 12786 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12787 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 85%|████████▌ | 12789/15000 [26:16<04:25,  8.34it/s]


 epoch: 12788 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12789 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

input:       of laughter itself they found an overall recognition rate of with joy correctly classified at tickle schadenfreude and taunt

target:      of laughter itself they found an overall recognition rate of with joy correctly classified at tickle schadenfreude and taunt their

prediction:  of laughter itself they found an overall recognition rate of with joy correctly classified at tickle schadenfreude and taunt the


 85%|████████▌ | 12792/15000 [26:17<04:06,  8.96it/s]


 epoch: 12790 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12791 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12792 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.0%


 85%|████████▌ | 12794/15000 [26:17<04:01,  9.15it/s]


 epoch: 12793 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12794 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 85%|████████▌ | 12796/15000 [26:17<03:47,  9.71it/s]


 epoch: 12795 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12796 | train_loss: 0.22, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12797 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 85%|████████▌ | 12799/15000 [26:17<03:44,  9.78it/s]


 epoch: 12798 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12799 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

input:       histories being at one time the home of that celebrated castaway alexander selkirk whose life and adventures have been

target:      histories being at one time the home of that celebrated castaway alexander selkirk whose life and adventures have been made


 85%|████████▌ | 12800/15000 [26:18<04:12,  8.72it/s]


prediction:  histories being at one time the home of that celebrated castaway alexander selkirk whose life and adventures have been the

 epoch: 12800 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 85%|████████▌ | 12803/15000 [26:18<06:08,  5.96it/s]


 epoch: 12801 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12802 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12803 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 85%|████████▌ | 12805/15000 [26:19<05:14,  6.99it/s]


 epoch: 12804 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12805 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12806 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 85%|████████▌ | 12807/15000 [26:19<04:35,  7.97it/s]


 epoch: 12807 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12808 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 85%|████████▌ | 12810/15000 [26:19<04:20,  8.42it/s]


 epoch: 12809 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

input:       cash reserves emperors of the antonine and severan dynasties debased the currency particularly the denarius under the pressures of

target:      cash reserves emperors of the antonine and severan dynasties debased the currency particularly the denarius under the pressures of meeting

prediction:  cash reserves emperors of the antonine and severan dynasties debased the currency particularly the denarius under the pressures of the

 epoch: 12810 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 85%|████████▌ | 12813/15000 [26:19<03:56,  9.24it/s]


 epoch: 12811 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12812 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12813 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 85%|████████▌ | 12814/15000 [26:19<03:52,  9.39it/s]


 epoch: 12814 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 85%|████████▌ | 12817/15000 [26:20<05:45,  6.31it/s]


 epoch: 12815 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12816 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12817 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%


 85%|████████▌ | 12818/15000 [26:20<05:25,  6.71it/s]


 epoch: 12818 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12819 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       formation of phylogenetic tree however languages differ from biological organisms in that they readily incorporate elements from other languages

target:      formation of phylogenetic tree however languages differ from biological organisms in that they readily incorporate elements from other languages through


 85%|████████▌ | 12820/15000 [26:21<05:13,  6.94it/s]


prediction:  formation of phylogenetic tree however languages differ from biological organisms in that they readily incorporate elements from other languages the

 epoch: 12820 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12821 | train_loss: 0.25, train_acc: 96.6% | test_loss: 0.23, test_acc: 97.2%


 85%|████████▌ | 12824/15000 [26:21<04:10,  8.68it/s]


 epoch: 12822 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 12823 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12824 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 86%|████████▌ | 12826/15000 [26:21<04:05,  8.85it/s]


 epoch: 12825 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12826 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 86%|████████▌ | 12828/15000 [26:21<04:04,  8.87it/s]


 epoch: 12827 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12828 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 86%|████████▌ | 12829/15000 [26:22<08:40,  4.17it/s]


 epoch: 12829 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

input:       this irregular behavior known as the gravitational singularity indicates that general relativity is not an adequate description of the

target:      this irregular behavior known as the gravitational singularity indicates that general relativity is not an adequate description of the laws

prediction:  this irregular behavior known as the gravitational singularity indicates that general relativity is not an adequate description of the the

 epoch: 12830 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.4%

 86%|████████▌ | 12831/15000 [26:22<07:06,  5.09it/s]



 epoch: 12831 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%


 86%|████████▌ | 12833/15000 [26:23<05:46,  6.25it/s]


 epoch: 12832 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 12833 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 86%|████████▌ | 12835/15000 [26:23<05:02,  7.15it/s]


 epoch: 12834 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12835 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 86%|████████▌ | 12837/15000 [26:23<04:41,  7.67it/s]


 epoch: 12836 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12837 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 86%|████████▌ | 12839/15000 [26:23<04:47,  7.51it/s]


 epoch: 12838 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12839 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%


 86%|████████▌ | 12840/15000 [26:24<05:36,  6.42it/s]


input:       the christian revivalist movement of the and known as the great awakening fueled colonial interest in both religion and

target:      the christian revivalist movement of the and known as the great awakening fueled colonial interest in both religion and religious

prediction:  the christian revivalist movement of the and known as the great awakening fueled colonial interest in both religion and the

 epoch: 12840 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 86%|████████▌ | 12842/15000 [26:24<05:00,  7.18it/s]


 epoch: 12841 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12842 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 86%|████████▌ | 12843/15000 [26:24<04:52,  7.38it/s]


 epoch: 12843 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 86%|████████▌ | 12847/15000 [26:24<04:24,  8.15it/s]


 epoch: 12844 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12845 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12846 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.4%

 epoch: 12847 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%


 86%|████████▌ | 12849/15000 [26:25<04:00,  8.94it/s]


 epoch: 12848 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12849 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       world largest companies are headquartered in the the united states is at or near the forefront of technological

target:      world largest companies are headquartered in the the united states is at or near the forefront of technological advancement

prediction:  world largest companies are headquartered in the the united states is at or near the forefront of technological the


 86%|████████▌ | 12851/15000 [26:25<04:11,  8.56it/s]


 epoch: 12850 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 12851 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 86%|████████▌ | 12852/15000 [26:25<04:05,  8.77it/s]


 epoch: 12852 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12853 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12854 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 86%|████████▌ | 12856/15000 [26:25<03:43,  9.59it/s]


 epoch: 12855 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12856 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%

 epoch: 12857 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 86%|████████▌ | 12858/15000 [26:26<06:27,  5.53it/s]


 epoch: 12858 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12859 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

input:       as being kind of semiotic activity however she too denies that memes are units referring to them as sign

target:      as being kind of semiotic activity however she too denies that memes are units referring to them as sign systems

prediction:  as being kind of semiotic activity however she too denies that memes are units referring to them as sign the


 86%|████████▌ | 12862/15000 [26:26<04:51,  7.34it/s]


 epoch: 12860 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12861 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12862 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 86%|████████▌ | 12865/15000 [26:27<04:07,  8.63it/s]


 epoch: 12863 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12864 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.6%

 epoch: 12865 | train_loss: 0.20, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 86%|████████▌ | 12868/15000 [26:27<03:51,  9.21it/s]


 epoch: 12866 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12867 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12868 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 86%|████████▌ | 12870/15000 [26:27<03:55,  9.06it/s]


 epoch: 12869 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%

input:       property owners except jews and in some areas catholics with very high birth rates low death rates and steadily

target:      property owners except jews and in some areas catholics with very high birth rates low death rates and steadily growing

prediction:  property owners except jews and in some areas catholics with very high birth rates low death rates and steadily the

 epoch: 12870 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 86%|████████▌ | 12871/15000 [26:27<03:52,  9.14it/s]


 epoch: 12871 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 12872 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%


 86%|████████▌ | 12875/15000 [26:28<05:21,  6.61it/s]


 epoch: 12873 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12874 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 12875 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 86%|████████▌ | 12877/15000 [26:28<04:41,  7.55it/s]


 epoch: 12876 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12877 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12878 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 86%|████████▌ | 12880/15000 [26:29<04:20,  8.15it/s]


 epoch: 12879 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

input:       presents regions of high population density such as the great urban centers the population is formed by descendants of

target:      presents regions of high population density such as the great urban centers the population is formed by descendants of europeans

prediction:  presents regions of high population density such as the great urban centers the population is formed by descendants of the

 epoch: 12880 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 86%|████████▌ | 12882/15000 [26:29<04:00,  8.81it/s]


 epoch: 12881 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12882 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12883 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 86%|████████▌ | 12886/15000 [26:29<03:33,  9.92it/s]


 epoch: 12884 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12885 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.0%

 epoch: 12886 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 86%|████████▌ | 12888/15000 [26:30<04:50,  7.26it/s]


 epoch: 12887 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12888 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12889 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 86%|████████▌ | 12890/15000 [26:30<04:34,  7.68it/s]


input:       psychology opened up humanistic themes to scientific study positive psychology is the study of factors which contribute to human

target:      psychology opened up humanistic themes to scientific study positive psychology is the study of factors which contribute to human happiness

prediction:  psychology opened up humanistic themes to scientific study positive psychology is the study of factors which contribute to human the

 epoch: 12890 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12891 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 86%|████████▌ | 12894/15000 [26:30<03:50,  9.14it/s]


 epoch: 12892 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12893 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12894 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 86%|████████▌ | 12896/15000 [26:30<03:45,  9.31it/s]


 epoch: 12895 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12896 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%


 86%|████████▌ | 12898/15000 [26:31<03:41,  9.49it/s]


 epoch: 12897 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12898 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12899 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%


 86%|████████▌ | 12900/15000 [26:31<03:58,  8.80it/s]


input:       arts the hudson river school was mid th century movement in the tradition of european naturalism the armory show

target:      arts the hudson river school was mid th century movement in the tradition of european naturalism the armory show in

prediction:  arts the hudson river school was mid th century movement in the tradition of european naturalism the armory show the

 epoch: 12900 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 86%|████████▌ | 12903/15000 [26:32<05:46,  6.05it/s]


 epoch: 12901 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 12902 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12903 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.26, test_acc: 97.1%


 86%|████████▌ | 12905/15000 [26:32<04:49,  7.23it/s]


 epoch: 12904 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12905 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12906 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 86%|████████▌ | 12908/15000 [26:32<04:06,  8.49it/s]


 epoch: 12907 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12908 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.8%

 epoch: 12909 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 86%|████████▌ | 12911/15000 [26:32<03:56,  8.84it/s]


input:       are many considerations when defining what constitutes programming language the term computer language is sometimes used interchangeably with programming

target:      are many considerations when defining what constitutes programming language the term computer language is sometimes used interchangeably with programming language

prediction:  are many considerations when defining what constitutes programming language the term computer language is sometimes used interchangeably with programming the

 epoch: 12910 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12911 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 86%|████████▌ | 12914/15000 [26:33<03:41,  9.43it/s]


 epoch: 12912 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12913 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12914 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 86%|████████▌ | 12917/15000 [26:33<05:31,  6.29it/s]


 epoch: 12915 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12916 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12917 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%


 86%|████████▌ | 12919/15000 [26:34<04:38,  7.47it/s]


 epoch: 12918 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12919 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.1%

input:       available to elites and religious groups the invention of the printing press in the th century made books more

target:      available to elites and religious groups the invention of the printing press in the th century made books more widely

prediction:  available to elites and religious groups the invention of the printing press in the th century made books more the


 86%|████████▌ | 12922/15000 [26:34<04:04,  8.48it/s]


 epoch: 12920 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12921 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.3%

 epoch: 12922 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 86%|████████▌ | 12925/15000 [26:34<03:41,  9.37it/s]


 epoch: 12923 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 12924 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12925 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 86%|████████▌ | 12927/15000 [26:34<03:42,  9.33it/s]


 epoch: 12926 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12927 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 86%|████████▌ | 12928/15000 [26:35<03:47,  9.11it/s]


 epoch: 12928 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 86%|████████▌ | 12930/15000 [26:35<06:36,  5.21it/s]


 epoch: 12929 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

input:       based on social convention linguistic signs can be considered arbitrary in the sense that the convention is established socially

target:      based on social convention linguistic signs can be considered arbitrary in the sense that the convention is established socially and

prediction:  based on social convention linguistic signs can be considered arbitrary in the sense that the convention is established socially the

 epoch: 12930 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%


 86%|████████▌ | 12932/15000 [26:35<05:19,  6.47it/s]


 epoch: 12931 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12932 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 86%|████████▌ | 12934/15000 [26:36<04:40,  7.36it/s]


 epoch: 12933 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12934 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


 86%|████████▌ | 12936/15000 [26:36<04:08,  8.29it/s]


 epoch: 12935 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12936 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 86%|████████▋ | 12938/15000 [26:36<04:07,  8.33it/s]


 epoch: 12937 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 12938 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 86%|████████▋ | 12940/15000 [26:36<04:50,  7.09it/s]


 epoch: 12939 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 96.9%

input:       in eastern europe by about ad and under their leader attila they fought against both sections of the roman

target:      in eastern europe by about ad and under their leader attila they fought against both sections of the roman empire

prediction:  in eastern europe by about ad and under their leader attila they fought against both sections of the roman the

 epoch: 12940 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 86%|████████▋ | 12942/15000 [26:37<04:40,  7.34it/s]


 epoch: 12941 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 12942 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%


 86%|████████▋ | 12944/15000 [26:37<04:36,  7.43it/s]


 epoch: 12943 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12944 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 86%|████████▋ | 12946/15000 [26:37<04:25,  7.73it/s]


 epoch: 12945 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12946 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 86%|████████▋ | 12948/15000 [26:37<04:12,  8.12it/s]


 epoch: 12947 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 12948 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 86%|████████▋ | 12950/15000 [26:38<04:53,  6.99it/s]


 epoch: 12949 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

input:       were defeated darius did not forget that athens had assisted the ionian revolt and in he assembled an armada

target:      were defeated darius did not forget that athens had assisted the ionian revolt and in he assembled an armada to

prediction:  were defeated darius did not forget that athens had assisted the ionian revolt and in he assembled an armada the

 epoch: 12950 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 86%|████████▋ | 12952/15000 [26:38<04:30,  7.56it/s]


 epoch: 12951 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 12952 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 86%|████████▋ | 12954/15000 [26:38<04:20,  7.85it/s]


 epoch: 12953 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12954 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 86%|████████▋ | 12956/15000 [26:38<04:20,  7.84it/s]


 epoch: 12955 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12956 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 86%|████████▋ | 12957/15000 [26:39<04:17,  7.95it/s]


 epoch: 12957 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%


 86%|████████▋ | 12958/15000 [26:39<05:41,  5.98it/s]


 epoch: 12958 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12959 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

input:       recently shifted to using cs go keys to liquidate their gains at this point nearly all key purchases that

target:      recently shifted to using cs go keys to liquidate their gains at this point nearly all key purchases that end


 86%|████████▋ | 12961/15000 [26:39<04:36,  7.39it/s]


prediction:  recently shifted to using cs go keys to liquidate their gains at this point nearly all key purchases that the

 epoch: 12960 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%

 epoch: 12961 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 86%|████████▋ | 12964/15000 [26:39<03:51,  8.79it/s]


 epoch: 12962 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12963 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12964 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 86%|████████▋ | 12966/15000 [26:40<03:35,  9.44it/s]


 epoch: 12965 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12966 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 12967 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 86%|████████▋ | 12969/15000 [26:40<03:25,  9.90it/s]


 epoch: 12968 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12969 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

input:       breton greek armenian and albanian distinct non indo european family of uralic languages estonian finnish hungarian erzya komi mari


 86%|████████▋ | 12971/15000 [26:40<03:54,  8.65it/s]


target:      breton greek armenian and albanian distinct non indo european family of uralic languages estonian finnish hungarian erzya komi mari moksha

prediction:  breton greek armenian and albanian distinct non indo european family of uralic languages estonian finnish hungarian erzya komi mari the

 epoch: 12970 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.0%

 epoch: 12971 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 86%|████████▋ | 12974/15000 [26:41<05:47,  5.82it/s]


 epoch: 12972 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12973 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.4%

 epoch: 12974 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 87%|████████▋ | 12976/15000 [26:41<04:49,  7.00it/s]


 epoch: 12975 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12976 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 12977 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 87%|████████▋ | 12978/15000 [26:41<04:16,  7.90it/s]


 epoch: 12978 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12979 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       on april chinese police detained chen mei and cai wei volunteers for terminus project hosted on github and accused

target:      on april chinese police detained chen mei and cai wei volunteers for terminus project hosted on github and accused them


 87%|████████▋ | 12981/15000 [26:42<04:07,  8.15it/s]


prediction:  on april chinese police detained chen mei and cai wei volunteers for terminus project hosted on github and accused the

 epoch: 12980 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 12981 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 87%|████████▋ | 12984/15000 [26:42<03:40,  9.13it/s]


 epoch: 12982 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 12983 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 12984 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 87%|████████▋ | 12985/15000 [26:42<03:42,  9.08it/s]


 epoch: 12985 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 87%|████████▋ | 12988/15000 [26:43<05:03,  6.64it/s]


 epoch: 12986 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12987 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 12988 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 87%|████████▋ | 12990/15000 [26:43<04:48,  6.97it/s]


 epoch: 12989 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       to the sun baked banks of the euphrates in syria from the great rhine danube river system which snaked

target:      to the sun baked banks of the euphrates in syria from the great rhine danube river system which snaked across

prediction:  to the sun baked banks of the euphrates in syria from the great rhine danube river system which snaked the

 epoch: 12990 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 87%|████████▋ | 12992/15000 [26:43<04:04,  8.23it/s]


 epoch: 12991 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 12992 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 12993 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 87%|████████▋ | 12995/15000 [26:44<03:42,  9.03it/s]


 epoch: 12994 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 12995 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 87%|████████▋ | 12998/15000 [26:44<03:29,  9.54it/s]


 epoch: 12996 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 12997 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 12998 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 12999 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       to operate independently as community platform and business under microsoft the service was led by xamarin nat friedman reporting

target:      to operate independently as community platform and business under microsoft the service was led by xamarin nat friedman reporting to

prediction:  to operate independently as community platform and business under microsoft the service was led by xamarin nat friedman reporting the


 87%|████████▋ | 13002/15000 [26:44<03:42,  8.97it/s]


 epoch: 13000 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13001 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13002 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 87%|████████▋ | 13005/15000 [26:45<03:27,  9.63it/s]


 epoch: 13003 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13004 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 13005 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%


 87%|████████▋ | 13007/15000 [26:45<03:16, 10.13it/s]


 epoch: 13006 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13007 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13008 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 87%|████████▋ | 13009/15000 [26:45<03:09, 10.53it/s]


 epoch: 13009 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       has been explained by theorists using variety of competing theories including einstein cosmological constant but also extending to more

target:      has been explained by theorists using variety of competing theories including einstein cosmological constant but also extending to more exotic

prediction:  has been explained by theorists using variety of competing theories including einstein cosmological constant but also extending to more the

 epoch: 13010 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 87%|████████▋ | 13013/15000 [26:45<03:15, 10.15it/s]


 epoch: 13011 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13012 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13013 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 87%|████████▋ | 13015/15000 [26:46<03:35,  9.22it/s]


 epoch: 13014 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13015 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13016 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 87%|████████▋ | 13019/15000 [26:46<03:13, 10.23it/s]


 epoch: 13017 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13018 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.8%

 epoch: 13019 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 87%|████████▋ | 13021/15000 [26:46<03:22,  9.79it/s]


input:       in less than year and the officers demanded valentinian choose co ruler on march valentinian chose his own younger

target:      in less than year and the officers demanded valentinian choose co ruler on march valentinian chose his own younger brother

prediction:  in less than year and the officers demanded valentinian choose co ruler on march valentinian chose his own younger the

 epoch: 13020 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13021 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 87%|████████▋ | 13023/15000 [26:46<03:15, 10.12it/s]


 epoch: 13022 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13023 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13024 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 87%|████████▋ | 13027/15000 [26:47<03:09, 10.40it/s]


 epoch: 13025 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 13026 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13027 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 87%|████████▋ | 13029/15000 [26:47<05:16,  6.23it/s]


 epoch: 13028 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13029 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

input:       obtain powerful swords or potions the new crackdowns were supported by persson citing him receiving multiple emails from parents

target:      obtain powerful swords or potions the new crackdowns were supported by persson citing him receiving multiple emails from parents of

prediction:  obtain powerful swords or potions the new crackdowns were supported by persson citing him receiving multiple emails from parents the


 87%|████████▋ | 13032/15000 [26:48<04:30,  7.28it/s]


 epoch: 13030 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13031 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 13032 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 87%|████████▋ | 13034/15000 [26:48<04:02,  8.10it/s]


 epoch: 13033 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13034 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13035 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 87%|████████▋ | 13038/15000 [26:48<03:25,  9.56it/s]


 epoch: 13036 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13037 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13038 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 87%|████████▋ | 13040/15000 [26:48<03:28,  9.39it/s]


 epoch: 13039 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

input:       both manufactured goods and natural resources than the world average merchandise exports from the continent were of gdp on

target:      both manufactured goods and natural resources than the world average merchandise exports from the continent were of gdp on an

prediction:  both manufactured goods and natural resources than the world average merchandise exports from the continent were of gdp on the

 epoch: 13040 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 87%|████████▋ | 13042/15000 [26:49<03:30,  9.32it/s]


 epoch: 13041 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13042 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.0%


 87%|████████▋ | 13044/15000 [26:49<03:57,  8.25it/s]


 epoch: 13043 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13044 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 87%|████████▋ | 13046/15000 [26:49<03:52,  8.40it/s]


 epoch: 13045 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 13046 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 87%|████████▋ | 13048/15000 [26:49<03:53,  8.36it/s]


 epoch: 13047 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13048 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 87%|████████▋ | 13050/15000 [26:50<04:17,  7.57it/s]


 epoch: 13049 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       phys at minimum the owner must possess year bachelors or equivalent degree in physics or related field and an

target:      phys at minimum the owner must possess year bachelors or equivalent degree in physics or related field and an additional

prediction:  phys at minimum the owner must possess year bachelors or equivalent degree in physics or related field and an the

 epoch: 13050 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 87%|████████▋ | 13052/15000 [26:50<04:04,  7.98it/s]


 epoch: 13051 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13052 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 87%|████████▋ | 13054/15000 [26:50<04:04,  7.94it/s]


 epoch: 13053 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13054 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 87%|████████▋ | 13056/15000 [26:50<03:57,  8.17it/s]


 epoch: 13055 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13056 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 87%|████████▋ | 13058/15000 [26:51<06:54,  4.69it/s]


 epoch: 13057 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13058 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 87%|████████▋ | 13060/15000 [26:51<05:47,  5.58it/s]


 epoch: 13059 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

input:       atomic physics biological physics chemical physics condensed matter physics cosmology geophysics gravitational physics material science medical physics microelectronics molecular

target:      atomic physics biological physics chemical physics condensed matter physics cosmology geophysics gravitational physics material science medical physics microelectronics molecular physics

prediction:  atomic physics biological physics chemical physics condensed matter physics cosmology geophysics gravitational physics material science medical physics microelectronics molecular the

 epoch: 13060 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 87%|████████▋ | 13062/15000 [26:52<04:46,  6.77it/s]


 epoch: 13061 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 13062 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 87%|████████▋ | 13064/15000 [26:52<04:24,  7.33it/s]


 epoch: 13063 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13064 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 87%|████████▋ | 13066/15000 [26:52<04:10,  7.71it/s]


 epoch: 13065 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 13066 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.20, test_acc: 97.3%


 87%|████████▋ | 13068/15000 [26:52<04:04,  7.90it/s]


 epoch: 13067 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13068 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 87%|████████▋ | 13070/15000 [26:53<04:22,  7.34it/s]


 epoch: 13069 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%

input:       on antarctica as of update countries have this consultative status decisions are based on consensus instead of vote the

target:      on antarctica as of update countries have this consultative status decisions are based on consensus instead of vote the treaty

prediction:  on antarctica as of update countries have this consultative status decisions are based on consensus instead of vote the the

 epoch: 13070 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 87%|████████▋ | 13072/15000 [26:53<06:50,  4.70it/s]


 epoch: 13071 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13072 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.5%


 87%|████████▋ | 13075/15000 [26:54<04:32,  7.07it/s]


 epoch: 13073 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13074 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 13075 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 87%|████████▋ | 13077/15000 [26:54<04:01,  7.98it/s]


 epoch: 13076 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 13077 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 87%|████████▋ | 13079/15000 [26:54<03:40,  8.72it/s]


 epoch: 13078 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

 epoch: 13079 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

input:       is much older than the term itself british computer scientists maurice wilkes and david wheeler worked on modular software

target:      is much older than the term itself british computer scientists maurice wilkes and david wheeler worked on modular software library


 87%|████████▋ | 13081/15000 [26:54<03:46,  8.47it/s]


prediction:  is much older than the term itself british computer scientists maurice wilkes and david wheeler worked on modular software the

 epoch: 13080 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13081 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 87%|████████▋ | 13084/15000 [26:55<03:25,  9.31it/s]


 epoch: 13082 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13083 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13084 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 87%|████████▋ | 13087/15000 [26:55<04:50,  6.58it/s]


 epoch: 13085 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13086 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 13087 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 87%|████████▋ | 13088/15000 [26:55<04:27,  7.14it/s]


 epoch: 13088 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13089 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

input:       and maximus tried to press the matter by settling succession as only legitimate emperor could do proclaiming his own

target:      and maximus tried to press the matter by settling succession as only legitimate emperor could do proclaiming his own infant

prediction:  and maximus tried to press the matter by settling succession as only legitimate emperor could do proclaiming his own the


 87%|████████▋ | 13092/15000 [26:56<03:38,  8.75it/s]


 epoch: 13090 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13091 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13092 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 87%|████████▋ | 13094/15000 [26:56<03:23,  9.35it/s]


 epoch: 13093 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.4%

 epoch: 13094 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13095 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 87%|████████▋ | 13098/15000 [26:56<03:07, 10.15it/s]


 epoch: 13096 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13097 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13098 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 87%|████████▋ | 13100/15000 [26:57<05:28,  5.79it/s]


 epoch: 13099 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

input:       accumulated knowledge from one generation to the next this process helps the student to function in society as regular

target:      accumulated knowledge from one generation to the next this process helps the student to function in society as regular citizen

prediction:  accumulated knowledge from one generation to the next this process helps the student to function in society as regular the

 epoch: 13100 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 87%|████████▋ | 13102/15000 [26:57<04:39,  6.79it/s]


 epoch: 13101 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13102 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

 epoch: 13103 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 87%|████████▋ | 13106/15000 [26:58<03:41,  8.53it/s]


 epoch: 13104 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13105 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13106 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 87%|████████▋ | 13108/15000 [26:58<03:25,  9.19it/s]


 epoch: 13107 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13108 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13109 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 87%|████████▋ | 13111/15000 [26:58<03:29,  9.04it/s]


input:       representation for each state many policy issues are decentralized at state or local level with widely differing laws by

target:      representation for each state many policy issues are decentralized at state or local level with widely differing laws by jurisdiction

prediction:  representation for each state many policy issues are decentralized at state or local level with widely differing laws by the

 epoch: 13110 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13111 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 87%|████████▋ | 13113/15000 [26:58<03:17,  9.57it/s]


 epoch: 13112 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13113 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 87%|████████▋ | 13115/15000 [26:59<05:28,  5.73it/s]


 epoch: 13114 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13115 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13116 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 87%|████████▋ | 13118/15000 [26:59<04:25,  7.10it/s]


 epoch: 13117 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13118 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 87%|████████▋ | 13120/15000 [26:59<04:07,  7.59it/s]


 epoch: 13119 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       composition is determined by various environmental factors that are interrelated variations of these factors will initiate dynamic modifications to

target:      composition is determined by various environmental factors that are interrelated variations of these factors will initiate dynamic modifications to the

prediction:  composition is determined by various environmental factors that are interrelated variations of these factors will initiate dynamic modifications to the

 epoch: 13120 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


 87%|████████▋ | 13122/15000 [27:00<03:42,  8.44it/s]


 epoch: 13121 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13122 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13123 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 88%|████████▊ | 13126/15000 [27:00<03:16,  9.53it/s]


 epoch: 13124 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.0%

 epoch: 13125 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.1%

 epoch: 13126 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13127 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 88%|████████▊ | 13128/15000 [27:01<05:28,  5.70it/s]


 epoch: 13128 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13129 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       effects upon dependent variables these conditions are approximated best in laboratory settings in contrast human environments and genetic backgrounds

target:      effects upon dependent variables these conditions are approximated best in laboratory settings in contrast human environments and genetic backgrounds vary

prediction:  effects upon dependent variables these conditions are approximated best in laboratory settings in contrast human environments and genetic backgrounds the


 88%|████████▊ | 13131/15000 [27:01<04:30,  6.92it/s]


 epoch: 13130 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13131 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 88%|████████▊ | 13133/15000 [27:01<03:59,  7.80it/s]


 epoch: 13132 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13133 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 13134 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 88%|████████▊ | 13136/15000 [27:01<03:30,  8.84it/s]



 epoch: 13135 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13136 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 88%|████████▊ | 13139/15000 [27:02<03:18,  9.35it/s]


 epoch: 13137 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13138 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13139 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 88%|████████▊ | 13141/15000 [27:02<03:33,  8.71it/s]


input:       with sediments or spill out of the basin containing them pond is body of standing water either natural or

target:      with sediments or spill out of the basin containing them pond is body of standing water either natural or human

prediction:  with sediments or spill out of the basin containing them pond is body of standing water either natural or the

 epoch: 13140 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.25, test_acc: 96.8%

 epoch: 13141 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 88%|████████▊ | 13143/15000 [27:03<06:12,  4.98it/s]


 epoch: 13142 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13143 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 88%|████████▊ | 13145/15000 [27:03<05:04,  6.09it/s]


 epoch: 13144 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13145 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 88%|████████▊ | 13147/15000 [27:03<04:33,  6.78it/s]


 epoch: 13146 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13147 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 88%|████████▊ | 13149/15000 [27:03<04:17,  7.20it/s]


 epoch: 13148 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13149 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 88%|████████▊ | 13150/15000 [27:04<05:02,  6.13it/s]


input:       that does not disgust is fundamental to humour in ancient sanskrit drama bharata muni natya shastra defined humour syam

target:      that does not disgust is fundamental to humour in ancient sanskrit drama bharata muni natya shastra defined humour syam as

prediction:  that does not disgust is fundamental to humour in ancient sanskrit drama bharata muni natya shastra defined humour syam the

 epoch: 13150 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 88%|████████▊ | 13152/15000 [27:04<04:32,  6.77it/s]


 epoch: 13151 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13152 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 88%|████████▊ | 13154/15000 [27:04<04:00,  7.67it/s]


 epoch: 13153 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13154 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 88%|████████▊ | 13156/15000 [27:04<03:46,  8.14it/s]


 epoch: 13155 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13156 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 88%|████████▊ | 13158/15000 [27:05<04:26,  6.91it/s]


 epoch: 13157 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13158 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 88%|████████▊ | 13160/15000 [27:05<04:35,  6.69it/s]


 epoch: 13159 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

input:       alternative theory has been as successful as the cold dark matter proposal in explaining all extant observations the horizon

target:      alternative theory has been as successful as the cold dark matter proposal in explaining all extant observations the horizon problem

prediction:  alternative theory has been as successful as the cold dark matter proposal in explaining all extant observations the horizon the

 epoch: 13160 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 88%|████████▊ | 13162/15000 [27:05<04:05,  7.48it/s]


 epoch: 13161 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13162 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 88%|████████▊ | 13164/15000 [27:06<03:52,  7.88it/s]


 epoch: 13163 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13164 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 88%|████████▊ | 13166/15000 [27:06<03:45,  8.14it/s]


 epoch: 13165 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13166 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 88%|████████▊ | 13168/15000 [27:06<03:55,  7.80it/s]


 epoch: 13167 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13168 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%


 88%|████████▊ | 13170/15000 [27:06<04:15,  7.16it/s]


 epoch: 13169 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       distinct area or school it stands for respect for the worth of persons respect for differences of approach open

target:      distinct area or school it stands for respect for the worth of persons respect for differences of approach open mindedness

prediction:  distinct area or school it stands for respect for the worth of persons respect for differences of approach open the

 epoch: 13170 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 88%|████████▊ | 13173/15000 [27:07<05:39,  5.38it/s]


 epoch: 13171 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

 epoch: 13172 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13173 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 88%|████████▊ | 13175/15000 [27:07<04:33,  6.68it/s]


 epoch: 13174 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

 epoch: 13175 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13176 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 88%|████████▊ | 13178/15000 [27:08<03:47,  8.01it/s]


 epoch: 13177 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13178 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.4%


 88%|████████▊ | 13180/15000 [27:08<03:52,  7.82it/s]


 epoch: 13179 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

input:       system cities were built with precise unmatched stonework constructed over many levels of mountain terrain terrace farming was useful

target:      system cities were built with precise unmatched stonework constructed over many levels of mountain terrain terrace farming was useful form

prediction:  system cities were built with precise unmatched stonework constructed over many levels of mountain terrain terrace farming was useful the

 epoch: 13180 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 88%|████████▊ | 13182/15000 [27:08<03:30,  8.63it/s]


 epoch: 13181 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13182 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13183 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 88%|████████▊ | 13185/15000 [27:08<03:16,  9.21it/s]


 epoch: 13184 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 13185 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 88%|████████▊ | 13188/15000 [27:09<04:57,  6.09it/s]


 epoch: 13186 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13187 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%

 epoch: 13188 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 88%|████████▊ | 13190/15000 [27:09<04:24,  6.84it/s]


 epoch: 13189 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

input:       passed which significantly improved living conditions in many british cities europe population increased from about million in to million

target:      passed which significantly improved living conditions in many british cities europe population increased from about million in to million by

prediction:  passed which significantly improved living conditions in many british cities europe population increased from about million in to million the

 epoch: 13190 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 88%|████████▊ | 13193/15000 [27:10<03:38,  8.28it/s]


 epoch: 13191 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13192 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13193 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 88%|████████▊ | 13196/15000 [27:10<03:17,  9.12it/s]


 epoch: 13194 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 96.9%

 epoch: 13195 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 13196 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 88%|████████▊ | 13199/15000 [27:10<03:06,  9.64it/s]


 epoch: 13197 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.9%

 epoch: 13198 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13199 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

input:       village of deir el medina has resulted in one of the most thoroughly documented accounts of community life in

target:      village of deir el medina has resulted in one of the most thoroughly documented accounts of community life in the

prediction:  village of deir el medina has resulted in one of the most thoroughly documented accounts of community life in the


 88%|████████▊ | 13202/15000 [27:11<05:01,  5.96it/s]


 epoch: 13200 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13201 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 13202 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 88%|████████▊ | 13204/15000 [27:11<04:14,  7.07it/s]


 epoch: 13203 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 96.8%

 epoch: 13204 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.5%

 epoch: 13205 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%


 88%|████████▊ | 13208/15000 [27:11<03:19,  9.00it/s]


 epoch: 13206 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13207 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13208 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 88%|████████▊ | 13210/15000 [27:12<03:26,  8.67it/s]


 epoch: 13209 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.0%

input:       from the rest of gondwana in the late cretaceous time ma by ma zealandia was essentially separate from australia

target:      from the rest of gondwana in the late cretaceous time ma by ma zealandia was essentially separate from australia and

prediction:  from the rest of gondwana in the late cretaceous time ma by ma zealandia was essentially separate from australia the

 epoch: 13210 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 88%|████████▊ | 13213/15000 [27:12<03:10,  9.39it/s]


 epoch: 13211 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13212 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13213 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 88%|████████▊ | 13215/15000 [27:13<05:20,  5.57it/s]


 epoch: 13214 | train_loss: 0.19, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13215 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 13216 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 88%|████████▊ | 13218/15000 [27:13<04:04,  7.30it/s]


 epoch: 13217 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.26, test_acc: 97.1%

 epoch: 13218 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 13219 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 88%|████████▊ | 13221/15000 [27:13<03:39,  8.10it/s]


input:       and functionalist frameworks that propose theories for describing syntactic structures based on different assumptions about what language is and

target:      and functionalist frameworks that propose theories for describing syntactic structures based on different assumptions about what language is and how

prediction:  and functionalist frameworks that propose theories for describing syntactic structures based on different assumptions about what language is and the

 epoch: 13220 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13221 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 88%|████████▊ | 13223/15000 [27:13<03:16,  9.05it/s]


 epoch: 13222 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13223 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13224 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 88%|████████▊ | 13227/15000 [27:14<02:56, 10.05it/s]


 epoch: 13225 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13226 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13227 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 88%|████████▊ | 13229/15000 [27:14<04:10,  7.06it/s]


 epoch: 13228 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13229 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

input:       own just income inequality in the remains at record highs with the top fifth of earners taking home

target:      own just income inequality in the remains at record highs with the top fifth of earners taking home more

prediction:  own just income inequality in the remains at record highs with the top fifth of earners taking home the


 88%|████████▊ | 13232/15000 [27:15<03:40,  8.00it/s]


 epoch: 13230 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13231 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13232 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 88%|████████▊ | 13234/15000 [27:15<03:19,  8.85it/s]


 epoch: 13233 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13234 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13235 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 88%|████████▊ | 13238/15000 [27:15<02:57,  9.95it/s]


 epoch: 13236 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13237 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13238 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 88%|████████▊ | 13240/15000 [27:15<03:04,  9.54it/s]


 epoch: 13239 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

input:       made changes to global offensive loot box mechanics due to realization that nearly all of the trading on loot

target:      made changes to global offensive loot box mechanics due to realization that nearly all of the trading on loot box

prediction:  made changes to global offensive loot box mechanics due to realization that nearly all of the trading on loot the

 epoch: 13240 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 88%|████████▊ | 13241/15000 [27:15<03:03,  9.59it/s]


 epoch: 13241 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13242 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 88%|████████▊ | 13245/15000 [27:16<02:56,  9.93it/s]


 epoch: 13243 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13244 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13245 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 88%|████████▊ | 13247/15000 [27:16<02:51, 10.23it/s]


 epoch: 13246 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13247 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13248 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 88%|████████▊ | 13249/15000 [27:16<02:46, 10.49it/s]


 epoch: 13249 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

input:       america sent delegates to the second continental congress in philadelphia who unanimously adopted the declaration of independence on july

target:      america sent delegates to the second continental congress in philadelphia who unanimously adopted the declaration of independence on july written

prediction:  america sent delegates to the second continental congress in philadelphia who unanimously adopted the declaration of independence on july the

 epoch: 13250 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 88%|████████▊ | 13251/15000 [27:16<02:51, 10.22it/s]


 epoch: 13251 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13252 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%


 88%|████████▊ | 13253/15000 [27:17<02:53, 10.07it/s]


 epoch: 13253 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13254 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 88%|████████▊ | 13256/15000 [27:17<03:04,  9.46it/s]


 epoch: 13255 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13256 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 88%|████████▊ | 13258/15000 [27:18<05:01,  5.77it/s]


 epoch: 13257 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%

 epoch: 13258 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 88%|████████▊ | 13260/15000 [27:18<04:56,  5.86it/s]


 epoch: 13259 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%

input:       acting on entire species and affecting their rates of speciation and extinction common misconception is that evolution has goals

target:      acting on entire species and affecting their rates of speciation and extinction common misconception is that evolution has goals long

prediction:  acting on entire species and affecting their rates of speciation and extinction common misconception is that evolution has goals the

 epoch: 13260 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 88%|████████▊ | 13262/15000 [27:18<04:11,  6.90it/s]


 epoch: 13261 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13262 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 88%|████████▊ | 13264/15000 [27:18<03:53,  7.44it/s]


 epoch: 13263 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13264 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 88%|████████▊ | 13266/15000 [27:19<03:43,  7.75it/s]


 epoch: 13265 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13266 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 88%|████████▊ | 13268/15000 [27:19<03:53,  7.41it/s]


 epoch: 13267 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13268 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 88%|████████▊ | 13270/15000 [27:19<04:21,  6.61it/s]


 epoch: 13269 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       plate aegean sea plate and anatolian plate to the north and the antarctic plate to the south the climate

target:      plate aegean sea plate and anatolian plate to the north and the antarctic plate to the south the climate of

prediction:  plate aegean sea plate and anatolian plate to the north and the antarctic plate to the south the climate the

 epoch: 13270 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 88%|████████▊ | 13272/15000 [27:20<03:58,  7.26it/s]


 epoch: 13271 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 13272 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%


 88%|████████▊ | 13274/15000 [27:20<03:44,  7.69it/s]


 epoch: 13273 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13274 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%


 89%|████████▊ | 13276/15000 [27:20<03:36,  7.96it/s]


 epoch: 13275 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 13276 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 89%|████████▊ | 13278/15000 [27:20<03:41,  7.77it/s]


 epoch: 13277 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13278 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 89%|████████▊ | 13280/15000 [27:21<04:07,  6.95it/s]


 epoch: 13279 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%

input:       addition of new rock units both depositionally and intrusively often occurs during deformation faulting and other deformational processes result

target:      addition of new rock units both depositionally and intrusively often occurs during deformation faulting and other deformational processes result in

prediction:  addition of new rock units both depositionally and intrusively often occurs during deformation faulting and other deformational processes result the

 epoch: 13280 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 89%|████████▊ | 13282/15000 [27:21<03:49,  7.50it/s]


 epoch: 13281 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13282 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 89%|████████▊ | 13284/15000 [27:21<03:37,  7.88it/s]


 epoch: 13283 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13284 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.1%


 89%|████████▊ | 13287/15000 [27:22<05:15,  5.43it/s]


 epoch: 13285 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13286 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13287 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 89%|████████▊ | 13289/15000 [27:22<04:13,  6.74it/s]


 epoch: 13288 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.5%

 epoch: 13289 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       may at the famous golden spike event at promontory summit utah it created nationwide mechanized transportation network that revolutionized

target:      may at the famous golden spike event at promontory summit utah it created nationwide mechanized transportation network that revolutionized the


 89%|████████▊ | 13291/15000 [27:22<03:58,  7.16it/s]


prediction:  may at the famous golden spike event at promontory summit utah it created nationwide mechanized transportation network that revolutionized the

 epoch: 13290 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13291 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 89%|████████▊ | 13293/15000 [27:22<03:28,  8.18it/s]


 epoch: 13292 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13293 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%


 89%|████████▊ | 13295/15000 [27:23<03:20,  8.50it/s]


 epoch: 13294 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13295 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 89%|████████▊ | 13297/15000 [27:23<03:11,  8.90it/s]


 epoch: 13296 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13297 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 89%|████████▊ | 13298/15000 [27:23<03:07,  9.08it/s]


 epoch: 13298 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%


 89%|████████▊ | 13300/15000 [27:24<05:52,  4.82it/s]


 epoch: 13299 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.4%

input:       phenomena such as fission and fusion are considered part of nuclear physics molecular physics focuses on multi atomic structures

target:      phenomena such as fission and fusion are considered part of nuclear physics molecular physics focuses on multi atomic structures and

prediction:  phenomena such as fission and fusion are considered part of nuclear physics molecular physics focuses on multi atomic structures the

 epoch: 13300 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 89%|████████▊ | 13302/15000 [27:24<04:27,  6.34it/s]


 epoch: 13301 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13302 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 89%|████████▊ | 13304/15000 [27:24<03:44,  7.55it/s]


 epoch: 13303 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13304 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.7%


 89%|████████▊ | 13307/15000 [27:24<03:06,  9.06it/s]


 epoch: 13305 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13306 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13307 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 89%|████████▊ | 13309/15000 [27:25<02:56,  9.59it/s]


 epoch: 13308 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 13309 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

input:       in hawaiian and tapa elsewhere in the pacific and tattoos native hawaiians had neither metal nor woven cloth rugby

target:      in hawaiian and tapa elsewhere in the pacific and tattoos native hawaiians had neither metal nor woven cloth rugby union


 89%|████████▊ | 13310/15000 [27:25<03:16,  8.59it/s]


prediction:  in hawaiian and tapa elsewhere in the pacific and tattoos native hawaiians had neither metal nor woven cloth rugby the

 epoch: 13310 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13311 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 89%|████████▊ | 13312/15000 [27:25<03:03,  9.20it/s]


 epoch: 13312 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13313 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 89%|████████▉ | 13316/15000 [27:26<03:18,  8.50it/s]


 epoch: 13314 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 13315 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13316 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 89%|████████▉ | 13318/15000 [27:26<03:11,  8.77it/s]


 epoch: 13317 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13318 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 89%|████████▉ | 13320/15000 [27:26<03:22,  8.29it/s]


 epoch: 13319 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

input:       and increasingly during the last years in ethiopia the domestication of cattle in africa preceded agriculture and seems to

target:      and increasingly during the last years in ethiopia the domestication of cattle in africa preceded agriculture and seems to have

prediction:  and increasingly during the last years in ethiopia the domestication of cattle in africa preceded agriculture and seems to the

 epoch: 13320 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 89%|████████▉ | 13323/15000 [27:26<03:00,  9.29it/s]


 epoch: 13321 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13322 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13323 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 89%|████████▉ | 13325/15000 [27:26<02:53,  9.64it/s]


 epoch: 13324 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 13325 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13326 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.19, test_acc: 97.3%


 89%|████████▉ | 13327/15000 [27:27<02:50,  9.79it/s]


 epoch: 13327 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 89%|████████▉ | 13328/15000 [27:27<05:20,  5.22it/s]


 epoch: 13328 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 13329 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

input:       emerged around bc and led to large temples and artworks as well as sophisticated textiles gold silver and copper

target:      emerged around bc and led to large temples and artworks as well as sophisticated textiles gold silver and copper were

prediction:  emerged around bc and led to large temples and artworks as well as sophisticated textiles gold silver and copper the


 89%|████████▉ | 13332/15000 [27:28<03:49,  7.28it/s]


 epoch: 13330 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13331 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13332 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 89%|████████▉ | 13335/15000 [27:28<03:15,  8.52it/s]


 epoch: 13333 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%

 epoch: 13334 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13335 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 89%|████████▉ | 13337/15000 [27:28<03:05,  8.96it/s]


 epoch: 13336 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13337 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 89%|████████▉ | 13338/15000 [27:28<03:04,  9.03it/s]


 epoch: 13338 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13339 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

input:       beginning of the war ferdinand vii returned to the throne and began the absolutist restoration as the royalists got

target:      beginning of the war ferdinand vii returned to the throne and began the absolutist restoration as the royalists got the


 89%|████████▉ | 13341/15000 [27:29<03:09,  8.75it/s]


prediction:  beginning of the war ferdinand vii returned to the throne and began the absolutist restoration as the royalists got the

 epoch: 13340 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13341 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 89%|████████▉ | 13343/15000 [27:29<05:12,  5.30it/s]


 epoch: 13342 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 13343 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 89%|████████▉ | 13346/15000 [27:30<03:43,  7.40it/s]


 epoch: 13344 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13345 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13346 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 89%|████████▉ | 13348/15000 [27:30<03:19,  8.29it/s]


 epoch: 13347 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13348 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 89%|████████▉ | 13350/15000 [27:30<03:31,  7.80it/s]


 epoch: 13349 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

input:       the russian orthodox holy trinity church at the bellingshausen station on king george island opened in it is manned

target:      the russian orthodox holy trinity church at the bellingshausen station on king george island opened in it is manned year

prediction:  the russian orthodox holy trinity church at the bellingshausen station on king george island opened in it is manned the

 epoch: 13350 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 89%|████████▉ | 13352/15000 [27:30<03:14,  8.48it/s]


 epoch: 13351 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13352 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 89%|████████▉ | 13355/15000 [27:31<02:55,  9.38it/s]


 epoch: 13353 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13354 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13355 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 89%|████████▉ | 13358/15000 [27:31<03:22,  8.10it/s]


 epoch: 13356 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13357 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13358 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 89%|████████▉ | 13360/15000 [27:31<03:28,  7.88it/s]


 epoch: 13359 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

input:       particular on the elusive smile on the woman face its mysterious quality perhaps due to the subtly shadowed corners

target:      particular on the elusive smile on the woman face its mysterious quality perhaps due to the subtly shadowed corners of

prediction:  particular on the elusive smile on the woman face its mysterious quality perhaps due to the subtly shadowed corners the

 epoch: 13360 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.1%


 89%|████████▉ | 13362/15000 [27:31<03:21,  8.12it/s]


 epoch: 13361 | train_loss: 0.26, train_acc: 96.6% | test_loss: 0.19, test_acc: 97.3%

 epoch: 13362 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 89%|████████▉ | 13364/15000 [27:32<03:13,  8.46it/s]


 epoch: 13363 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13364 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 89%|████████▉ | 13366/15000 [27:32<03:11,  8.54it/s]


 epoch: 13365 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 13366 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 89%|████████▉ | 13368/15000 [27:32<03:13,  8.45it/s]


 epoch: 13367 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13368 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 89%|████████▉ | 13369/15000 [27:32<03:20,  8.13it/s]


 epoch: 13369 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       the mid the title dictator was never again used as the adopted heir of julius caesar octavian had taken

target:      the mid the title dictator was never again used as the adopted heir of julius caesar octavian had taken caesar

prediction:  the mid the title dictator was never again used as the adopted heir of julius caesar octavian had taken the


 89%|████████▉ | 13371/15000 [27:33<06:17,  4.32it/s]


 epoch: 13370 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13371 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 89%|████████▉ | 13373/15000 [27:33<04:50,  5.59it/s]


 epoch: 13372 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13373 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 89%|████████▉ | 13375/15000 [27:34<04:07,  6.57it/s]


 epoch: 13374 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13375 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%


 89%|████████▉ | 13377/15000 [27:34<03:50,  7.04it/s]


 epoch: 13376 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13377 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 89%|████████▉ | 13379/15000 [27:34<03:38,  7.42it/s]


 epoch: 13378 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13379 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 89%|████████▉ | 13380/15000 [27:34<04:20,  6.21it/s]


input:       the last years in ethiopia the domestication of cattle in africa preceded agriculture and seems to have existed alongside

target:      the last years in ethiopia the domestication of cattle in africa preceded agriculture and seems to have existed alongside hunter

prediction:  the last years in ethiopia the domestication of cattle in africa preceded agriculture and seems to have existed alongside the

 epoch: 13380 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 89%|████████▉ | 13382/15000 [27:35<03:39,  7.38it/s]


 epoch: 13381 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.0%

 epoch: 13382 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 89%|████████▉ | 13384/15000 [27:35<03:27,  7.78it/s]


 epoch: 13383 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13384 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 89%|████████▉ | 13386/15000 [27:35<04:02,  6.66it/s]


 epoch: 13385 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13386 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 89%|████████▉ | 13388/15000 [27:35<03:31,  7.63it/s]


 epoch: 13387 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13388 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13389 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


 89%|████████▉ | 13390/15000 [27:36<03:21,  7.99it/s]


input:       or pacific islands are bullen added that asia europe and the maritime continent are not literal geographic continents the

target:      or pacific islands are bullen added that asia europe and the maritime continent are not literal geographic continents the asia

prediction:  or pacific islands are bullen added that asia europe and the maritime continent are not literal geographic continents the the

 epoch: 13390 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13391 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 89%|████████▉ | 13394/15000 [27:36<02:51,  9.35it/s]


 epoch: 13392 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13393 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13394 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 89%|████████▉ | 13396/15000 [27:36<02:47,  9.55it/s]


 epoch: 13395 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13396 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 89%|████████▉ | 13398/15000 [27:36<02:53,  9.24it/s]


 epoch: 13397 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13398 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 89%|████████▉ | 13400/15000 [27:37<05:29,  4.85it/s]


 epoch: 13399 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       sections of chromosome becoming duplicated usually by genetic recombination which can introduce extra copies of gene into genome extra

target:      sections of chromosome becoming duplicated usually by genetic recombination which can introduce extra copies of gene into genome extra copies

prediction:  sections of chromosome becoming duplicated usually by genetic recombination which can introduce extra copies of gene into genome extra the

 epoch: 13400 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 89%|████████▉ | 13402/15000 [27:37<04:09,  6.39it/s]


 epoch: 13401 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 13402 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13403 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%


 89%|████████▉ | 13406/15000 [27:38<03:11,  8.33it/s]


 epoch: 13404 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13405 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13406 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 89%|████████▉ | 13409/15000 [27:38<02:55,  9.08it/s]


 epoch: 13407 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13408 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13409 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 96.9%


 89%|████████▉ | 13411/15000 [27:38<03:04,  8.61it/s]


input:       mansiones was determined by how far wagon could travel in day carts were usually pulled by mules travelling about

target:      mansiones was determined by how far wagon could travel in day carts were usually pulled by mules travelling about mph

prediction:  mansiones was determined by how far wagon could travel in day carts were usually pulled by mules travelling about the

 epoch: 13410 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 13411 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 89%|████████▉ | 13412/15000 [27:38<03:00,  8.78it/s]


 epoch: 13412 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 89%|████████▉ | 13414/15000 [27:39<05:13,  5.06it/s]


 epoch: 13413 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13414 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 89%|████████▉ | 13417/15000 [27:39<03:37,  7.29it/s]


 epoch: 13415 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13416 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13417 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 89%|████████▉ | 13419/15000 [27:39<03:08,  8.38it/s]


 epoch: 13418 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13419 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       the role of education in transitioning from an authoritarian regime to democracy the history of education examines the evolution

target:      the role of education in transitioning from an authoritarian regime to democracy the history of education examines the evolution of

prediction:  the role of education in transitioning from an authoritarian regime to democracy the history of education examines the evolution the


 89%|████████▉ | 13422/15000 [27:40<02:57,  8.89it/s]


 epoch: 13420 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13421 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13422 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 89%|████████▉ | 13424/15000 [27:40<02:49,  9.28it/s]


 epoch: 13423 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13424 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 90%|████████▉ | 13425/15000 [27:40<02:50,  9.26it/s]


 epoch: 13425 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13426 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 90%|████████▉ | 13429/15000 [27:41<02:47,  9.36it/s]


 epoch: 13427 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13428 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 13429 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 90%|████████▉ | 13430/15000 [27:41<03:00,  8.69it/s]


input:       thought that leonardo never made painting from it the closest similarity being to the virgin and child with saint

target:      thought that leonardo never made painting from it the closest similarity being to the virgin and child with saint anne

prediction:  thought that leonardo never made painting from it the closest similarity being to the virgin and child with saint the

 epoch: 13430 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13431 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 90%|████████▉ | 13434/15000 [27:41<02:37,  9.94it/s]


 epoch: 13432 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 96.7%

 epoch: 13433 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13434 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 90%|████████▉ | 13437/15000 [27:41<02:31, 10.29it/s]


 epoch: 13435 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13436 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13437 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 90%|████████▉ | 13439/15000 [27:42<02:31, 10.31it/s]


 epoch: 13438 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13439 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       of waterloo napoleonic rule resulted in the further dissemination of the ideals of the french revolution including that of

target:      of waterloo napoleonic rule resulted in the further dissemination of the ideals of the french revolution including that of the

prediction:  of waterloo napoleonic rule resulted in the further dissemination of the ideals of the french revolution including that of the


 90%|████████▉ | 13441/15000 [27:42<04:33,  5.70it/s]


 epoch: 13440 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13441 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13442 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 90%|████████▉ | 13445/15000 [27:43<03:21,  7.73it/s]


 epoch: 13443 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13444 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13445 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 90%|████████▉ | 13447/15000 [27:43<03:04,  8.42it/s]


 epoch: 13446 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13447 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13448 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 90%|████████▉ | 13449/15000 [27:43<02:50,  9.12it/s]


 epoch: 13449 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

input:       institution most notably those headquartered in per bastet during the new kingdom and at abydos and sa in the

target:      institution most notably those headquartered in per bastet during the new kingdom and at abydos and sa in the late

prediction:  institution most notably those headquartered in per bastet during the new kingdom and at abydos and sa in the the

 epoch: 13450 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 90%|████████▉ | 13452/15000 [27:43<02:50,  9.08it/s]


 epoch: 13451 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13452 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 90%|████████▉ | 13453/15000 [27:43<02:49,  9.14it/s]


 epoch: 13453 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13454 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 90%|████████▉ | 13457/15000 [27:44<03:59,  6.44it/s]


 epoch: 13455 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13456 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13457 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%


 90%|████████▉ | 13458/15000 [27:44<03:46,  6.82it/s]


 epoch: 13458 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13459 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       liz lee were identified by redditors and outside commentators as child pornography because the photos were taken when the

target:      liz lee were identified by redditors and outside commentators as child pornography because the photos were taken when the women

prediction:  liz lee were identified by redditors and outside commentators as child pornography because the photos were taken when the the


 90%|████████▉ | 13462/15000 [27:45<03:06,  8.26it/s]


 epoch: 13460 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13461 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13462 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.1%


 90%|████████▉ | 13465/15000 [27:45<02:48,  9.12it/s]


 epoch: 13463 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 13464 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13465 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 90%|████████▉ | 13467/15000 [27:45<02:44,  9.33it/s]


 epoch: 13466 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13467 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13468 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 90%|████████▉ | 13470/15000 [27:46<03:21,  7.60it/s]


 epoch: 13469 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       the history and present of the making or screening of films on the african continent and also refers to

target:      the history and present of the making or screening of films on the african continent and also refers to the

prediction:  the history and present of the making or screening of films on the african continent and also refers to the

 epoch: 13470 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 90%|████████▉ | 13472/15000 [27:46<03:14,  7.87it/s]


 epoch: 13471 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13472 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 90%|████████▉ | 13474/15000 [27:46<03:14,  7.84it/s]


 epoch: 13473 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13474 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 90%|████████▉ | 13476/15000 [27:46<03:00,  8.46it/s]


 epoch: 13475 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13476 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 90%|████████▉ | 13478/15000 [27:47<03:11,  7.97it/s]


 epoch: 13477 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13478 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 90%|████████▉ | 13479/15000 [27:47<03:18,  7.65it/s]


 epoch: 13479 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       complaints about apa members with the apa ethics committee members of the apa have three year window some of

target:      complaints about apa members with the apa ethics committee members of the apa have three year window some of the

prediction:  complaints about apa members with the apa ethics committee members of the apa have three year window some of the


 90%|████████▉ | 13481/15000 [27:47<03:44,  6.75it/s]


 epoch: 13480 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13481 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 90%|████████▉ | 13482/15000 [27:47<03:27,  7.32it/s]


 epoch: 13482 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 90%|████████▉ | 13484/15000 [27:48<05:19,  4.75it/s]


 epoch: 13483 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13484 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.2%


 90%|████████▉ | 13486/15000 [27:48<04:10,  6.05it/s]


 epoch: 13485 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.5%

 epoch: 13486 | train_loss: 0.25, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 90%|████████▉ | 13488/15000 [27:48<03:34,  7.05it/s]


 epoch: 13487 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13488 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 90%|████████▉ | 13490/15000 [27:49<03:40,  6.86it/s]


 epoch: 13489 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       only appearing more diverse because it is more noticeable indeed the evolution of microorganisms is particularly important to evolutionary

target:      only appearing more diverse because it is more noticeable indeed the evolution of microorganisms is particularly important to evolutionary research

prediction:  only appearing more diverse because it is more noticeable indeed the evolution of microorganisms is particularly important to evolutionary the

 epoch: 13490 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 90%|████████▉ | 13492/15000 [27:49<03:13,  7.78it/s]


 epoch: 13491 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13492 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%


 90%|████████▉ | 13494/15000 [27:49<03:12,  7.83it/s]


 epoch: 13493 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13494 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%


 90%|████████▉ | 13496/15000 [27:49<03:07,  8.04it/s]


 epoch: 13495 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13496 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 90%|████████▉ | 13499/15000 [27:50<02:55,  8.57it/s]


 epoch: 13497 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13498 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13499 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 90%|█████████ | 13500/15000 [27:50<03:07,  8.00it/s]


input:       are coffee cocoa and bananas mainly in brazil colombia and ecuador traditionally the countries producing sugar for export are

target:      are coffee cocoa and bananas mainly in brazil colombia and ecuador traditionally the countries producing sugar for export are peru

prediction:  are coffee cocoa and bananas mainly in brazil colombia and ecuador traditionally the countries producing sugar for export are the

 epoch: 13500 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13501 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 90%|█████████ | 13504/15000 [27:50<02:38,  9.47it/s]


 epoch: 13502 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13503 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13504 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 90%|█████████ | 13507/15000 [27:51<02:30,  9.90it/s]


 epoch: 13505 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13506 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13507 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 90%|█████████ | 13509/15000 [27:51<02:31,  9.86it/s]


 epoch: 13508 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13509 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

input:       to spread potentially dangerous misinformation the idea and initial development of reddit originated with college roommates steve huffman and

target:      to spread potentially dangerous misinformation the idea and initial development of reddit originated with college roommates steve huffman and alexis


 90%|█████████ | 13510/15000 [27:51<02:52,  8.63it/s]


prediction:  to spread potentially dangerous misinformation the idea and initial development of reddit originated with college roommates steve huffman and the

 epoch: 13510 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 90%|█████████ | 13513/15000 [27:51<03:18,  7.50it/s]


 epoch: 13511 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

 epoch: 13512 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13513 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.1%


 90%|█████████ | 13515/15000 [27:52<02:56,  8.42it/s]


 epoch: 13514 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13515 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13516 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 90%|█████████ | 13519/15000 [27:52<02:38,  9.36it/s]


 epoch: 13517 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13518 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13519 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 90%|█████████ | 13520/15000 [27:52<02:59,  8.25it/s]


input:       outclassed its contemporaries martin gaston of videogamer com wrote that although he was too old to truly enjoy the

target:      outclassed its contemporaries martin gaston of videogamer com wrote that although he was too old to truly enjoy the game

prediction:  outclassed its contemporaries martin gaston of videogamer com wrote that although he was too old to truly enjoy the the

 epoch: 13520 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 90%|█████████ | 13522/15000 [27:52<02:48,  8.77it/s]


 epoch: 13521 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13522 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13523 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 90%|█████████ | 13525/15000 [27:53<04:26,  5.53it/s]


 epoch: 13524 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13525 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 90%|█████████ | 13528/15000 [27:53<03:17,  7.47it/s]


 epoch: 13526 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13527 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13528 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.26, test_acc: 96.9%


 90%|█████████ | 13530/15000 [27:54<03:05,  7.92it/s]


 epoch: 13529 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.8%

input:       steady state model whereby new matter would be created as the universe seemed to expand in this model the

target:      steady state model whereby new matter would be created as the universe seemed to expand in this model the universe

prediction:  steady state model whereby new matter would be created as the universe seemed to expand in this model the the

 epoch: 13530 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 90%|█████████ | 13533/15000 [27:54<02:42,  9.02it/s]


 epoch: 13531 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13532 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13533 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 90%|█████████ | 13536/15000 [27:54<02:33,  9.51it/s]


 epoch: 13534 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13535 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13536 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 90%|█████████ | 13538/15000 [27:54<02:31,  9.68it/s]


 epoch: 13537 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13538 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 90%|█████████ | 13540/15000 [27:55<02:59,  8.12it/s]


 epoch: 13539 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

input:       rd century when the instability of the empire disrupted production hydraulic mining allowed base and precious metals to be

target:      rd century when the instability of the empire disrupted production hydraulic mining allowed base and precious metals to be extracted

prediction:  rd century when the instability of the empire disrupted production hydraulic mining allowed base and precious metals to be the

 epoch: 13540 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 90%|█████████ | 13542/15000 [27:55<02:40,  9.08it/s]


 epoch: 13541 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13542 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 13543 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 90%|█████████ | 13545/15000 [27:55<02:29,  9.71it/s]


 epoch: 13544 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13545 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13546 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 90%|█████████ | 13549/15000 [27:56<02:17, 10.57it/s]


 epoch: 13547 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13548 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13549 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 90%|█████████ | 13551/15000 [27:56<02:29,  9.67it/s]


input:       ancient egyptian technology are indicated by set of artifacts and customs that lasted for thousands of years the egyptians

target:      ancient egyptian technology are indicated by set of artifacts and customs that lasted for thousands of years the egyptians invented

prediction:  ancient egyptian technology are indicated by set of artifacts and customs that lasted for thousands of years the egyptians the

 epoch: 13550 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13551 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 90%|█████████ | 13552/15000 [27:56<02:32,  9.51it/s]


 epoch: 13552 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 90%|█████████ | 13555/15000 [27:57<03:35,  6.69it/s]


 epoch: 13553 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13554 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13555 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 90%|█████████ | 13557/15000 [27:57<03:07,  7.71it/s]


 epoch: 13556 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13557 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13558 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 90%|█████████ | 13560/15000 [27:57<02:51,  8.38it/s]


 epoch: 13559 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

input:       distinguished by their distinct sounds which are result of their different articulations and can be either vowels or consonants

target:      distinguished by their distinct sounds which are result of their different articulations and can be either vowels or consonants suprasegmental

prediction:  distinguished by their distinct sounds which are result of their different articulations and can be either vowels or consonants the

 epoch: 13560 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 90%|█████████ | 13563/15000 [27:57<02:35,  9.24it/s]


 epoch: 13561 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%

 epoch: 13562 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%

 epoch: 13563 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 90%|█████████ | 13566/15000 [27:58<02:28,  9.65it/s]


 epoch: 13564 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13565 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13566 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 90%|█████████ | 13569/15000 [27:58<03:44,  6.37it/s]


 epoch: 13567 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13568 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13569 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 90%|█████████ | 13570/15000 [27:59<03:42,  6.41it/s]


input:       exploit the deposit it is necessary capital technology and knowledge that can only come from offshore energy companies who

target:      exploit the deposit it is necessary capital technology and knowledge that can only come from offshore energy companies who view

prediction:  exploit the deposit it is necessary capital technology and knowledge that can only come from offshore energy companies who the

 epoch: 13570 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13571 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 90%|█████████ | 13574/15000 [27:59<02:46,  8.57it/s]


 epoch: 13572 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13573 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 13574 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%


 91%|█████████ | 13576/15000 [27:59<02:36,  9.08it/s]


 epoch: 13575 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13576 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13577 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 91%|█████████ | 13579/15000 [27:59<02:31,  9.39it/s]


 epoch: 13578 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13579 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

input:       playstation were released on december and september respectively the playstation version was announced as launch title though it was


 91%|█████████ | 13580/15000 [28:00<02:54,  8.13it/s]


target:      playstation were released on december and september respectively the playstation version was announced as launch title though it was eventually

prediction:  playstation were released on december and september respectively the playstation version was announced as launch title though it was the

 epoch: 13580 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 91%|█████████ | 13582/15000 [28:00<02:54,  8.12it/s]


 epoch: 13581 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13582 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 91%|█████████ | 13584/15000 [28:00<02:49,  8.37it/s]


 epoch: 13583 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.1%

 epoch: 13584 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 91%|█████████ | 13586/15000 [28:00<02:49,  8.32it/s]


 epoch: 13585 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13586 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%


 91%|█████████ | 13588/15000 [28:01<02:52,  8.17it/s]


 epoch: 13587 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.4%

 epoch: 13588 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 91%|█████████ | 13589/15000 [28:01<02:53,  8.12it/s]


 epoch: 13589 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       parthian war successful war and negotiated peace with the parthian empire the suppression of revolt led by boudica in

target:      parthian war successful war and negotiated peace with the parthian empire the suppression of revolt led by boudica in britannia

prediction:  parthian war successful war and negotiated peace with the parthian empire the suppression of revolt led by boudica in the

 epoch: 13590 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 91%|█████████ | 13592/15000 [28:01<03:13,  7.28it/s]


 epoch: 13591 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13592 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 91%|█████████ | 13594/15000 [28:01<02:58,  7.87it/s]


 epoch: 13593 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13594 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 91%|█████████ | 13596/15000 [28:02<05:12,  4.49it/s]


 epoch: 13595 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13596 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 91%|█████████ | 13598/15000 [28:02<03:56,  5.93it/s]


 epoch: 13597 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 13598 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 91%|█████████ | 13600/15000 [28:03<03:33,  6.54it/s]


 epoch: 13599 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

input:       of space exploration in the twentieth century geologists have begun to look at other planetary bodies in the same

target:      of space exploration in the twentieth century geologists have begun to look at other planetary bodies in the same ways

prediction:  of space exploration in the twentieth century geologists have begun to look at other planetary bodies in the same the

 epoch: 13600 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 91%|█████████ | 13602/15000 [28:03<03:18,  7.05it/s]


 epoch: 13601 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13602 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 91%|█████████ | 13604/15000 [28:03<03:01,  7.68it/s]


 epoch: 13603 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13604 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 91%|█████████ | 13606/15000 [28:03<03:01,  7.66it/s]


 epoch: 13605 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13606 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 91%|█████████ | 13608/15000 [28:04<02:58,  7.81it/s]


 epoch: 13607 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13608 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 91%|█████████ | 13610/15000 [28:04<05:32,  4.18it/s]


 epoch: 13609 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       pigman like mobs called piglins and their zombified counterparts plus floating balloon like mobs called ghasts the piglins are

target:      pigman like mobs called piglins and their zombified counterparts plus floating balloon like mobs called ghasts the piglins are considered

prediction:  pigman like mobs called piglins and their zombified counterparts plus floating balloon like mobs called ghasts the piglins are the

 epoch: 13610 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%


 91%|█████████ | 13612/15000 [28:05<03:55,  5.91it/s]


 epoch: 13611 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13612 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 91%|█████████ | 13615/15000 [28:05<02:50,  8.12it/s]


 epoch: 13613 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.4%

 epoch: 13614 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13615 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 91%|█████████ | 13617/15000 [28:05<02:33,  9.02it/s]


 epoch: 13616 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13617 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 91%|█████████ | 13619/15000 [28:05<02:31,  9.12it/s]


 epoch: 13618 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13619 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

input:       the top languages spoken by more than million speakers each are spoken by of the world population in contrast

target:      the top languages spoken by more than million speakers each are spoken by of the world population in contrast many


 91%|█████████ | 13620/15000 [28:05<02:47,  8.23it/s]


prediction:  the top languages spoken by more than million speakers each are spoken by of the world population in contrast the

 epoch: 13620 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13621 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 91%|█████████ | 13623/15000 [28:06<02:34,  8.91it/s]


 epoch: 13622 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.4%

 epoch: 13623 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 91%|█████████ | 13626/15000 [28:06<03:37,  6.31it/s]


 epoch: 13624 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13625 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13626 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 91%|█████████ | 13629/15000 [28:07<02:49,  8.08it/s]


 epoch: 13627 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13628 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13629 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 91%|█████████ | 13631/15000 [28:07<02:49,  8.08it/s]


input:       that latin was the language of the military the last reference to gaulish was between and the emergent gallo

target:      that latin was the language of the military the last reference to gaulish was between and the emergent gallo romance

prediction:  that latin was the language of the military the last reference to gaulish was between and the emergent gallo the

 epoch: 13630 | train_loss: 0.19, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%

 epoch: 13631 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


 91%|█████████ | 13633/15000 [28:07<02:39,  8.59it/s]


 epoch: 13632 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13633 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 91%|█████████ | 13635/15000 [28:07<02:35,  8.80it/s]


 epoch: 13634 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13635 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 91%|█████████ | 13637/15000 [28:08<02:34,  8.82it/s]


 epoch: 13636 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13637 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 91%|█████████ | 13639/15000 [28:08<02:40,  8.49it/s]


 epoch: 13638 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13639 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

input:       rigid rules that governed its highly stylized and symbolic appearance ancient egyptian art served its political and religious purposes


 91%|█████████ | 13640/15000 [28:08<03:05,  7.33it/s]


target:      rigid rules that governed its highly stylized and symbolic appearance ancient egyptian art served its political and religious purposes with

prediction:  rigid rules that governed its highly stylized and symbolic appearance ancient egyptian art served its political and religious purposes the

 epoch: 13640 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 91%|█████████ | 13643/15000 [28:08<02:34,  8.77it/s]


 epoch: 13641 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13642 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13643 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 91%|█████████ | 13646/15000 [28:09<02:22,  9.53it/s]


 epoch: 13644 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13645 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13646 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 91%|█████████ | 13648/15000 [28:09<02:23,  9.41it/s]


 epoch: 13647 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.3%

 epoch: 13648 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%


 91%|█████████ | 13650/15000 [28:09<02:37,  8.58it/s]


 epoch: 13649 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

input:       develop on the earth were microbes and they remained the only form of life until about billion years ago

target:      develop on the earth were microbes and they remained the only form of life until about billion years ago when

prediction:  develop on the earth were microbes and they remained the only form of life until about billion years ago the

 epoch: 13650 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 91%|█████████ | 13651/15000 [28:09<02:39,  8.48it/s]


 epoch: 13651 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 91%|█████████ | 13653/15000 [28:10<03:46,  5.95it/s]


 epoch: 13652 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13653 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 91%|█████████ | 13655/15000 [28:10<03:07,  7.19it/s]


 epoch: 13654 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13655 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 91%|█████████ | 13657/15000 [28:10<02:41,  8.32it/s]


 epoch: 13656 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13657 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.27, test_acc: 97.0%

 epoch: 13658 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 91%|█████████ | 13660/15000 [28:10<02:40,  8.35it/s]


 epoch: 13659 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       asia via the bering strait during the early middle holocene prior to the arrival of european explorers and colonists

target:      asia via the bering strait during the early middle holocene prior to the arrival of european explorers and colonists in

prediction:  asia via the bering strait during the early middle holocene prior to the arrival of european explorers and colonists the

 epoch: 13660 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 91%|█████████ | 13662/15000 [28:11<02:28,  9.04it/s]


 epoch: 13661 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13662 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13663 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 91%|█████████ | 13665/15000 [28:11<02:22,  9.36it/s]


 epoch: 13664 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13665 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 91%|█████████ | 13666/15000 [28:11<02:20,  9.47it/s]


 epoch: 13666 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%


 91%|█████████ | 13669/15000 [28:12<03:35,  6.16it/s]


 epoch: 13667 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13668 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13669 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 91%|█████████ | 13670/15000 [28:12<03:36,  6.15it/s]


input:       speakers invented polished stone axes for clearing forest around bc the saharan climate started to become drier at an

target:      speakers invented polished stone axes for clearing forest around bc the saharan climate started to become drier at an exceedingly

prediction:  speakers invented polished stone axes for clearing forest around bc the saharan climate started to become drier at an the

 epoch: 13670 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13671 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 91%|█████████ | 13674/15000 [28:12<02:36,  8.48it/s]


 epoch: 13672 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 96.9%

 epoch: 13673 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13674 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 91%|█████████ | 13677/15000 [28:13<02:21,  9.35it/s]


 epoch: 13675 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13676 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13677 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 91%|█████████ | 13679/15000 [28:13<02:20,  9.40it/s]


 epoch: 13678 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13679 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

input:       ample powers from the united states of america to spain to seek assistance in the revolutionary war effort the

target:      ample powers from the united states of america to spain to seek assistance in the revolutionary war effort the first

prediction:  ample powers from the united states of america to spain to seek assistance in the revolutionary war effort the the


 91%|█████████ | 13682/15000 [28:13<02:58,  7.37it/s]


 epoch: 13680 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13681 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13682 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 91%|█████████ | 13685/15000 [28:14<02:35,  8.43it/s]


 epoch: 13683 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13684 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%

 epoch: 13685 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 91%|█████████ | 13687/15000 [28:14<02:35,  8.43it/s]


 epoch: 13686 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13687 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 91%|█████████▏| 13689/15000 [28:14<02:38,  8.29it/s]


 epoch: 13688 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13689 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 91%|█████████▏| 13690/15000 [28:14<03:03,  7.14it/s]


input:       as receiving screenshots from students to show completion of lesson in september minecraftedu said that approximately students around the

target:      as receiving screenshots from students to show completion of lesson in september minecraftedu said that approximately students around the world

prediction:  as receiving screenshots from students to show completion of lesson in september minecraftedu said that approximately students around the the

 epoch: 13690 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%


 91%|█████████▏| 13692/15000 [28:15<02:51,  7.61it/s]


 epoch: 13691 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13692 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 91%|█████████▏| 13694/15000 [28:15<02:54,  7.47it/s]


 epoch: 13693 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13694 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 91%|█████████▏| 13696/15000 [28:15<02:49,  7.72it/s]


 epoch: 13695 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13696 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 91%|█████████▏| 13698/15000 [28:15<02:40,  8.11it/s]


 epoch: 13697 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13698 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 91%|█████████▏| 13699/15000 [28:15<02:38,  8.22it/s]


 epoch: 13699 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

input:       are evaluated by women no evidence was found to suggest men prefer women with sense of humour as partners

target:      are evaluated by women no evidence was found to suggest men prefer women with sense of humour as partners nor

prediction:  are evaluated by women no evidence was found to suggest men prefer women with sense of humour as partners the


 91%|█████████▏| 13701/15000 [28:16<02:54,  7.43it/s]


 epoch: 13700 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13701 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 91%|█████████▏| 13703/15000 [28:16<02:38,  8.17it/s]


 epoch: 13702 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13703 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 91%|█████████▏| 13705/15000 [28:16<02:36,  8.26it/s]


 epoch: 13704 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13705 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 91%|█████████▏| 13707/15000 [28:16<02:31,  8.51it/s]


 epoch: 13706 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.5%

 epoch: 13707 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 91%|█████████▏| 13709/15000 [28:17<03:37,  5.93it/s]


 epoch: 13708 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13709 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 91%|█████████▏| 13710/15000 [28:17<03:45,  5.72it/s]


input:       and the speciation process has been repeated in the laboratory which allows the study of the genetic mechanisms involved

target:      and the speciation process has been repeated in the laboratory which allows the study of the genetic mechanisms involved in

prediction:  and the speciation process has been repeated in the laboratory which allows the study of the genetic mechanisms involved the

 epoch: 13710 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 91%|█████████▏| 13712/15000 [28:17<03:09,  6.79it/s]


 epoch: 13711 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13712 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 91%|█████████▏| 13714/15000 [28:18<02:59,  7.17it/s]


 epoch: 13713 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.8%

 epoch: 13714 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 91%|█████████▏| 13717/15000 [28:18<02:24,  8.88it/s]


 epoch: 13715 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13716 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13717 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 91%|█████████▏| 13718/15000 [28:18<02:20,  9.11it/s]


 epoch: 13718 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13719 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       have offered sufficient privacy for the illegitimate birth though it is still possible he was born in house in

target:      have offered sufficient privacy for the illegitimate birth though it is still possible he was born in house in florence

prediction:  have offered sufficient privacy for the illegitimate birth though it is still possible he was born in house in the


 91%|█████████▏| 13721/15000 [28:18<02:24,  8.85it/s]


 epoch: 13720 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13721 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13722 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.8%


 91%|█████████▏| 13724/15000 [28:19<03:20,  6.36it/s]


 epoch: 13723 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13724 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13725 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 92%|█████████▏| 13727/15000 [28:19<02:48,  7.56it/s]


 epoch: 13726 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13727 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 92%|█████████▏| 13728/15000 [28:19<02:40,  7.92it/s]


 epoch: 13728 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13729 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       history of hawaii economy can be traced through succession of dominant industries sandalwood whaling sugarcane pineapple the military tourism

target:      history of hawaii economy can be traced through succession of dominant industries sandalwood whaling sugarcane pineapple the military tourism and


 92%|█████████▏| 13730/15000 [28:20<02:44,  7.71it/s]


prediction:  history of hawaii economy can be traced through succession of dominant industries sandalwood whaling sugarcane pineapple the military tourism the

 epoch: 13730 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13731 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 92%|█████████▏| 13734/15000 [28:20<02:19,  9.08it/s]


 epoch: 13732 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13733 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13734 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 92%|█████████▏| 13736/15000 [28:20<02:12,  9.52it/s]


 epoch: 13735 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13736 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 92%|█████████▏| 13738/15000 [28:21<03:01,  6.93it/s]


 epoch: 13737 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%

 epoch: 13738 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.9%

 epoch: 13739 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 92%|█████████▏| 13740/15000 [28:21<02:51,  7.35it/s]


input:       the ability to add new advancements dimensions functions loot tables predicates recipes structures tags world generation settings and biomes

target:      the ability to add new advancements dimensions functions loot tables predicates recipes structures tags world generation settings and biomes the

prediction:  the ability to add new advancements dimensions functions loot tables predicates recipes structures tags world generation settings and biomes the

 epoch: 13740 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.5%

 epoch: 13741 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 92%|█████████▏| 13744/15000 [28:21<02:22,  8.81it/s]


 epoch: 13742 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13743 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.5%

 epoch: 13744 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 92%|█████████▏| 13747/15000 [28:22<02:12,  9.44it/s]


 epoch: 13745 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13746 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13747 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 92%|█████████▏| 13748/15000 [28:22<02:12,  9.48it/s]


 epoch: 13748 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13749 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

input:       spoken in europe zuni of new mexico pur pecha of mexico ainu of japan burushaski of pakistan and many

target:      spoken in europe zuni of new mexico pur pecha of mexico ainu of japan burushaski of pakistan and many others


 92%|█████████▏| 13750/15000 [28:22<02:22,  8.78it/s]


prediction:  spoken in europe zuni of new mexico pur pecha of mexico ainu of japan burushaski of pakistan and many the

 epoch: 13750 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 92%|█████████▏| 13753/15000 [28:23<03:30,  5.93it/s]


 epoch: 13751 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13752 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 13753 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 92%|█████████▏| 13755/15000 [28:23<02:57,  7.01it/s]


 epoch: 13754 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13755 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 13756 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.1%


 92%|█████████▏| 13759/15000 [28:23<02:20,  8.83it/s]


 epoch: 13757 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13758 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13759 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 92%|█████████▏| 13761/15000 [28:24<02:22,  8.67it/s]


input:       africa has the largest number of megafauna species as it was least affected by the extinction of the pleistocene

target:      africa has the largest number of megafauna species as it was least affected by the extinction of the pleistocene megafauna

prediction:  africa has the largest number of megafauna species as it was least affected by the extinction of the pleistocene the

 epoch: 13760 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13761 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 92%|█████████▏| 13764/15000 [28:24<02:12,  9.30it/s]


 epoch: 13762 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13763 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13764 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 92%|█████████▏| 13767/15000 [28:25<03:11,  6.43it/s]


 epoch: 13765 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13766 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13767 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 92%|█████████▏| 13769/15000 [28:25<02:44,  7.47it/s]


 epoch: 13768 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13769 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

input:       of disruptive or manipulative practices by its members from sources such as troll farms click farms and astroturfing another

target:      of disruptive or manipulative practices by its members from sources such as troll farms click farms and astroturfing another example

prediction:  of disruptive or manipulative practices by its members from sources such as troll farms click farms and astroturfing another the


 92%|█████████▏| 13771/15000 [28:25<02:41,  7.62it/s]


 epoch: 13770 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13771 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13772 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 92%|█████████▏| 13774/15000 [28:25<02:16,  8.98it/s]



 epoch: 13773 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13774 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 92%|█████████▏| 13776/15000 [28:26<02:10,  9.41it/s]


 epoch: 13775 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13776 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13777 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 92%|█████████▏| 13778/15000 [28:26<02:04,  9.81it/s]


 epoch: 13778 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13779 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

input:       critical when developing public facing api common threats include sql injection denial of service attack dos broken authentication and

target:      critical when developing public facing api common threats include sql injection denial of service attack dos broken authentication and exposing

prediction:  critical when developing public facing api common threats include sql injection denial of service attack dos broken authentication and the


 92%|█████████▏| 13781/15000 [28:27<03:19,  6.11it/s]


 epoch: 13780 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13781 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13782 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 92%|█████████▏| 13784/15000 [28:27<02:39,  7.62it/s]


 epoch: 13783 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13784 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 92%|█████████▏| 13786/15000 [28:27<02:24,  8.39it/s]


 epoch: 13785 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13786 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13787 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%


 92%|█████████▏| 13789/15000 [28:27<02:08,  9.45it/s]


 epoch: 13788 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13789 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 92%|█████████▏| 13791/15000 [28:28<02:17,  8.81it/s]


input:       theorists claim that the term education is context dependent this implies that its meaning varies depending on the situation

target:      theorists claim that the term education is context dependent this implies that its meaning varies depending on the situation in

prediction:  theorists claim that the term education is context dependent this implies that its meaning varies depending on the situation the

 epoch: 13790 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13791 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 92%|█████████▏| 13793/15000 [28:28<02:14,  9.00it/s]


 epoch: 13792 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13793 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 92%|█████████▏| 13795/15000 [28:28<04:07,  4.88it/s]


 epoch: 13794 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13795 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 92%|█████████▏| 13797/15000 [28:29<03:09,  6.34it/s]


 epoch: 13796 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13797 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 92%|█████████▏| 13799/15000 [28:29<02:52,  6.96it/s]


 epoch: 13798 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13799 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 92%|█████████▏| 13800/15000 [28:29<03:07,  6.39it/s]


input:       the movement of the plates on the surface and the convection of the mantle that is the heat transfer

target:      the movement of the plates on the surface and the convection of the mantle that is the heat transfer caused

prediction:  the movement of the plates on the surface and the convection of the mantle that is the heat transfer the

 epoch: 13800 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 92%|█████████▏| 13802/15000 [28:29<02:46,  7.18it/s]


 epoch: 13801 | train_loss: 0.22, train_acc: 96.7% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13802 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 92%|█████████▏| 13804/15000 [28:30<02:36,  7.64it/s]


 epoch: 13803 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13804 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 92%|█████████▏| 13806/15000 [28:30<02:25,  8.22it/s]


 epoch: 13805 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13806 | train_loss: 0.19, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%


 92%|█████████▏| 13808/15000 [28:30<02:33,  7.75it/s]


 epoch: 13807 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.4%

 epoch: 13808 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%


 92%|█████████▏| 13810/15000 [28:30<02:54,  6.80it/s]


 epoch: 13809 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

input:       recession in it impacted much of the region in fears of sovereign debt crisis developed concerning some countries in

target:      recession in it impacted much of the region in fears of sovereign debt crisis developed concerning some countries in europe

prediction:  recession in it impacted much of the region in fears of sovereign debt crisis developed concerning some countries in the

 epoch: 13810 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 92%|█████████▏| 13812/15000 [28:31<02:38,  7.49it/s]


 epoch: 13811 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 13812 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 92%|█████████▏| 13814/15000 [28:31<02:36,  7.57it/s]


 epoch: 13813 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.3%

 epoch: 13814 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 92%|█████████▏| 13816/15000 [28:31<02:18,  8.53it/s]


 epoch: 13815 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 13816 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 92%|█████████▏| 13818/15000 [28:31<02:17,  8.59it/s]


 epoch: 13817 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13818 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 92%|█████████▏| 13820/15000 [28:32<02:52,  6.84it/s]


 epoch: 13819 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       peacetime as the classicist clifford ando noted most of the cultural appurtenances popularly associated with imperial culture public cult

target:      peacetime as the classicist clifford ando noted most of the cultural appurtenances popularly associated with imperial culture public cult and

prediction:  peacetime as the classicist clifford ando noted most of the cultural appurtenances popularly associated with imperial culture public cult the

 epoch: 13820 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 92%|█████████▏| 13822/15000 [28:32<02:29,  7.89it/s]


 epoch: 13821 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13822 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 92%|█████████▏| 13825/15000 [28:33<03:38,  5.38it/s]


 epoch: 13823 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13824 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13825 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 92%|█████████▏| 13827/15000 [28:33<02:59,  6.54it/s]


 epoch: 13826 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13827 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 92%|█████████▏| 13828/15000 [28:33<02:46,  7.04it/s]


 epoch: 13828 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 13829 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

input:       the natives and settlers came to develop mutual dependency settlers traded for food and animal pelts and native americans

target:      the natives and settlers came to develop mutual dependency settlers traded for food and animal pelts and native americans traded


 92%|█████████▏| 13830/15000 [28:33<02:38,  7.37it/s]


prediction:  the natives and settlers came to develop mutual dependency settlers traded for food and animal pelts and native americans the

 epoch: 13830 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13831 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 92%|█████████▏| 13833/15000 [28:34<02:17,  8.47it/s]


 epoch: 13832 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13833 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 92%|█████████▏| 13835/15000 [28:34<02:11,  8.84it/s]


 epoch: 13834 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13835 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 92%|█████████▏| 13837/15000 [28:34<03:44,  5.17it/s]


 epoch: 13836 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13837 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.3%


 92%|█████████▏| 13839/15000 [28:35<02:57,  6.53it/s]


 epoch: 13838 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 13839 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

input:       territory the east central brazilian plateau has humid and warm tropical climate the northern and eastern parts of the

target:     

 92%|█████████▏| 13841/15000 [28:35<02:44,  7.04it/s]

 territory the east central brazilian plateau has humid and warm tropical climate the northern and eastern parts of the argentine

prediction:  territory the east central brazilian plateau has humid and warm tropical climate the northern and eastern parts of the the

 epoch: 13840 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13841 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 92%|█████████▏| 13844/15000 [28:35<02:13,  8.63it/s]


 epoch: 13842 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%

 epoch: 13843 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%

 epoch: 13844 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.0%


 92%|█████████▏| 13847/15000 [28:35<02:01,  9.51it/s]


 epoch: 13845 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 13846 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13847 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 92%|█████████▏| 13848/15000 [28:36<02:00,  9.54it/s]


 epoch: 13848 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%

 epoch: 13849 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

input:       replicating information found on the self replicating chromosome while the identification of memes as units conveys their nature to

target:      replicating information found on the self replicating chromosome while the identification of memes as units conveys their nature to replicate


 92%|█████████▏| 13850/15000 [28:36<02:10,  8.81it/s]


prediction:  replicating information found on the self replicating chromosome while the identification of memes as units conveys their nature to the

 epoch: 13850 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 92%|█████████▏| 13853/15000 [28:36<02:53,  6.61it/s]


 epoch: 13851 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 13852 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 13853 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 92%|█████████▏| 13855/15000 [28:37<02:29,  7.65it/s]


 epoch: 13854 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13855 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13856 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 92%|█████████▏| 13858/15000 [28:37<02:10,  8.77it/s]


 epoch: 13857 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13858 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%

 epoch: 13859 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.9%


 92%|█████████▏| 13861/15000 [28:37<02:08,  8.88it/s]


input:       addition to sending an expeditionary force to fight in the italian campaign brief war was fought between argentina and

target:      addition to sending an expeditionary force to fight in the italian campaign brief war was fought between argentina and the

prediction:  addition to sending an expeditionary force to fight in the italian campaign brief war was fought between argentina and the

 epoch: 13860 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

 epoch: 13861 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 92%|█████████▏| 13863/15000 [28:37<02:03,  9.17it/s]


 epoch: 13862 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13863 | train_loss: 0.19, train_acc: 97.5% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13864 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%


 92%|█████████▏| 13866/15000 [28:38<03:19,  5.67it/s]


 epoch: 13865 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13866 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 13867 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%


 92%|█████████▏| 13868/15000 [28:38<02:44,  6.90it/s]


 epoch: 13868 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13869 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

input:       critical thinking but in some cases obedience to an authority is required to ensure social stability by helping people

target:      critical thinking but in some cases obedience to an authority is required to ensure social stability by helping people become

prediction:  critical thinking but in some cases obedience to an authority is required to ensure social stability by helping people the


 92%|█████████▏| 13871/15000 [28:39<02:20,  8.04it/s]


 epoch: 13870 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13871 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13872 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 92%|█████████▏| 13874/15000 [28:39<02:09,  8.69it/s]


 epoch: 13873 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13874 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 93%|█████████▎| 13877/15000 [28:39<01:59,  9.36it/s]


 epoch: 13875 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 13876 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13877 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 93%|█████████▎| 13878/15000 [28:39<02:02,  9.14it/s]


 epoch: 13878 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 93%|█████████▎| 13880/15000 [28:40<03:21,  5.56it/s]


 epoch: 13879 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

input:       explorer rge ousland became the first person to cross antarctica alone from coast to coast helped by kite on

target:      explorer rge ousland became the first person to cross antarctica alone from coast to coast helped by kite on parts

prediction:  explorer rge ousland became the first person to cross antarctica alone from coast to coast helped by kite on the

 epoch: 13880 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 93%|█████████▎| 13882/15000 [28:40<02:39,  6.99it/s]


 epoch: 13881 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%

 epoch: 13882 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13883 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 93%|█████████▎| 13886/15000 [28:41<02:05,  8.85it/s]


 epoch: 13884 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13885 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 13886 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.3%


 93%|█████████▎| 13888/15000 [28:41<02:02,  9.10it/s]


 epoch: 13887 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13888 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 93%|█████████▎| 13890/15000 [28:41<02:14,  8.25it/s]


 epoch: 13889 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       standard model of particle physics was derived following the discovery of particle with properties consistent with the higgs boson

target:      standard model of particle physics was derived following the discovery of particle with properties consistent with the higgs boson at

prediction:  standard model of particle physics was derived following the discovery of particle with properties consistent with the higgs boson the

 epoch: 13890 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%


 93%|█████████▎| 13893/15000 [28:41<02:00,  9.22it/s]


 epoch: 13891 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 13892 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13893 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 93%|█████████▎| 13895/15000 [28:42<03:40,  5.00it/s]


 epoch: 13894 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13895 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 93%|█████████▎| 13897/15000 [28:42<02:59,  6.13it/s]


 epoch: 13896 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.0%

 epoch: 13897 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 93%|█████████▎| 13899/15000 [28:43<02:34,  7.14it/s]


 epoch: 13898 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13899 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 93%|█████████▎| 13901/15000 [28:43<02:31,  7.23it/s]


input:       was launched in may other ground and balloon based cosmic microwave background experiments are ongoing using big bang models

target:      was launched in may other ground and balloon based cosmic microwave background experiments are ongoing using big bang models it

prediction:  was launched in may other ground and balloon based cosmic microwave background experiments are ongoing using big bang models the

 epoch: 13900 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13901 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 93%|█████████▎| 13903/15000 [28:43<02:17,  7.96it/s]


 epoch: 13902 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 13903 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 93%|█████████▎| 13905/15000 [28:43<02:21,  7.73it/s]


 epoch: 13904 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13905 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.27, test_acc: 97.0%


 93%|█████████▎| 13907/15000 [28:44<02:19,  7.83it/s]


 epoch: 13906 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13907 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 93%|█████████▎| 13909/15000 [28:44<03:52,  4.69it/s]


 epoch: 13908 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.2%

 epoch: 13909 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%


 93%|█████████▎| 13910/15000 [28:45<03:54,  4.64it/s]


input:       magnificent propylaea the entrances built by pericles before the right wing of which was the small temple of athena

target:      magnificent propylaea the entrances built by pericles before the right wing of which was the small temple of athena nike

prediction:  magnificent propylaea the entrances built by pericles before the right wing of which was the small temple of athena the

 epoch: 13910 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 93%|█████████▎| 13912/15000 [28:45<03:06,  5.85it/s]


 epoch: 13911 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13912 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 93%|█████████▎| 13914/15000 [28:45<02:44,  6.62it/s]


 epoch: 13913 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13914 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 93%|█████████▎| 13917/15000 [28:45<02:14,  8.07it/s]


 epoch: 13915 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13916 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13917 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 93%|█████████▎| 13919/15000 [28:46<02:01,  8.91it/s]


 epoch: 13918 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13919 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%

input:       in the center and the marquesas islands the tuamotus mangareva islands and easter island to the east australasia comprises

target:      in the center and the marquesas islands the tuamotus mangareva islands and easter island to the east australasia comprises australia


 93%|█████████▎| 13920/15000 [28:46<02:12,  8.13it/s]


prediction:  in the center and the marquesas islands the tuamotus mangareva islands and easter island to the east australasia comprises the

 epoch: 13920 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13921 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 93%|█████████▎| 13923/15000 [28:46<02:04,  8.67it/s]


 epoch: 13922 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 13923 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%


 93%|█████████▎| 13925/15000 [28:46<01:56,  9.24it/s]


 epoch: 13924 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13925 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13926 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 93%|█████████▎| 13929/15000 [28:47<01:48,  9.87it/s]


 epoch: 13927 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13928 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13929 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 93%|█████████▎| 13930/15000 [28:47<01:59,  8.93it/s]


input:       complex jobs such as pilot and cosmonaut interdisciplinary studies became popular and scholars such as georgy shchedrovitsky developed systems

target:      complex jobs such as pilot and cosmonaut interdisciplinary studies became popular and scholars such as georgy shchedrovitsky developed systems theory

prediction:  complex jobs such as pilot and cosmonaut interdisciplinary studies became popular and scholars such as georgy shchedrovitsky developed systems the

 epoch: 13930 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 13931 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 93%|█████████▎| 13933/15000 [28:47<01:52,  9.51it/s]


 epoch: 13932 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13933 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 93%|█████████▎| 13936/15000 [28:47<01:49,  9.75it/s]


 epoch: 13934 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13935 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13936 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 93%|█████████▎| 13938/15000 [28:48<03:08,  5.63it/s]


 epoch: 13937 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 13938 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13939 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 93%|█████████▎| 13940/15000 [28:48<02:44,  6.45it/s]



input:       workforce as of sugar exports and the growing tourist industry are the major sources of foreign exchange sugar cane

target:      workforce as of sugar exports and the growing tourist industry are the major sources of foreign exchange sugar cane processing

prediction:  workforce as of sugar exports and the growing tourist industry are the major sources of foreign exchange sugar cane the

 epoch: 13940 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 93%|█████████▎| 13942/15000 [28:48<02:18,  7.63it/s]


 epoch: 13941 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 13942 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 93%|█████████▎| 13946/15000 [28:49<01:51,  9.45it/s]


 epoch: 13943 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13944 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13945 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13946 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 93%|█████████▎| 13949/15000 [28:49<01:47,  9.79it/s]


 epoch: 13947 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 13948 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13949 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 93%|█████████▎| 13950/15000 [28:49<01:57,  8.91it/s]


input:       at achieving certain goals which include the transmission of knowledge skills and character traits however there is extensive debate

target:      at achieving certain goals which include the transmission of knowledge skills and character traits however there is extensive debate regarding

prediction:  at achieving certain goals which include the transmission of knowledge skills and character traits however there is extensive debate the

 epoch: 13950 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 93%|█████████▎| 13953/15000 [28:50<03:00,  5.79it/s]


 epoch: 13951 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13952 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 13953 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 93%|█████████▎| 13955/15000 [28:50<02:30,  6.95it/s]


 epoch: 13954 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13955 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 13956 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 93%|█████████▎| 13958/15000 [28:51<02:05,  8.28it/s]


 epoch: 13957 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 13958 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13959 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 93%|█████████▎| 13961/15000 [28:51<02:01,  8.58it/s]


input:       violated their policy of posting personal information of others triggering wave of criticism from users on the donald who

target:      violated their policy of posting personal information of others triggering wave of criticism from users on the donald who felt

prediction:  violated their policy of posting personal information of others triggering wave of criticism from users on the donald who the

 epoch: 13960 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 13961 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 93%|█████████▎| 13963/15000 [28:51<01:55,  8.97it/s]


 epoch: 13962 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13963 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 93%|█████████▎| 13964/15000 [28:51<01:52,  9.18it/s]


 epoch: 13964 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%


 93%|█████████▎| 13966/15000 [28:52<03:12,  5.37it/s]


 epoch: 13965 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13966 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%


 93%|█████████▎| 13968/15000 [28:52<02:30,  6.86it/s]


 epoch: 13967 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13968 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 13969 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 93%|█████████▎| 13970/15000 [28:52<02:17,  7.49it/s]


input:       as well as forum sites and social media has been and continues to be an integral part of internet

target:      as well as forum sites and social media has been and continues to be an integral part of internet culture

prediction:  as well as forum sites and social media has been and continues to be an integral part of internet the

 epoch: 13970 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 13971 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 93%|█████████▎| 13974/15000 [28:53<01:50,  9.26it/s]


 epoch: 13972 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 13973 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13974 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 93%|█████████▎| 13976/15000 [28:53<01:47,  9.49it/s]


 epoch: 13975 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13976 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 13977 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 93%|█████████▎| 13978/15000 [28:53<01:44,  9.74it/s]


 epoch: 13978 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 93%|█████████▎| 13980/15000 [28:54<03:00,  5.64it/s]


 epoch: 13979 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

input:       common coverage in the videos includes creations made by players walkthroughs of various tasks and parodies of works in

target:      common coverage in the videos includes creations made by players walkthroughs of various tasks and parodies of works in popular

prediction:  common coverage in the videos includes creations made by players walkthroughs of various tasks and parodies of works in the

 epoch: 13980 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 93%|█████████▎| 13982/15000 [28:54<02:35,  6.56it/s]


 epoch: 13981 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 13982 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13983 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 93%|█████████▎| 13986/15000 [28:54<02:02,  8.25it/s]


 epoch: 13984 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 13985 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 13986 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 93%|█████████▎| 13988/15000 [28:54<01:55,  8.75it/s]


 epoch: 13987 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.1%

 epoch: 13988 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 93%|█████████▎| 13990/15000 [28:55<02:06,  7.99it/s]


 epoch: 13989 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       as nation state from smaller principalities in the austro hungarian empire was formed saw the unifications of both italy

target:      as nation state from smaller principalities in the austro hungarian empire was formed saw the unifications of both italy and

prediction:  as nation state from smaller principalities in the austro hungarian empire was formed saw the unifications of both italy the

 epoch: 13990 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.4%


 93%|█████████▎| 13992/15000 [28:55<01:58,  8.53it/s]


 epoch: 13991 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 13992 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 93%|█████████▎| 13994/15000 [28:55<03:01,  5.53it/s]


 epoch: 13993 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 13994 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 93%|█████████▎| 13996/15000 [28:56<02:34,  6.50it/s]


 epoch: 13995 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 13996 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 93%|█████████▎| 13998/15000 [28:56<02:21,  7.09it/s]


 epoch: 13997 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 13998 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%


 93%|█████████▎| 14000/15000 [28:56<02:19,  7.18it/s]


 epoch: 13999 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

input:       survived examples of roman sculpture survive abundantly though often in damaged or fragmentary condition including freestanding statuary in marble

target:      survived examples of roman sculpture survive abundantly though often in damaged or fragmentary condition including freestanding statuary in marble bronze

prediction:  survived examples of roman sculpture survive abundantly though often in damaged or fragmentary condition including freestanding statuary in marble the

 epoch: 14000 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.3%


 93%|█████████▎| 14002/15000 [28:56<02:03,  8.10it/s]


 epoch: 14001 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.3%

 epoch: 14002 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 93%|█████████▎| 14004/15000 [28:57<02:03,  8.07it/s]


 epoch: 14003 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14004 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 93%|█████████▎| 14006/15000 [28:57<02:09,  7.68it/s]


 epoch: 14005 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14006 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 93%|█████████▎| 14007/15000 [28:57<02:07,  7.76it/s]


 epoch: 14007 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 93%|█████████▎| 14009/15000 [28:58<03:25,  4.82it/s]


 epoch: 14008 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14009 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 93%|█████████▎| 14010/15000 [28:58<03:19,  4.97it/s]


input:       while mathematics is concerned with abstract patterns even beyond the real world thus physics statements are synthetic while mathematical

target:      while mathematics is concerned with abstract patterns even beyond the real world thus physics statements are synthetic while mathematical statements

prediction:  while mathematics is concerned with abstract patterns even beyond the real world thus physics statements are synthetic while mathematical the

 epoch: 14010 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 93%|█████████▎| 14012/15000 [28:58<02:42,  6.09it/s]


 epoch: 14011 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14012 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 93%|█████████▎| 14014/15000 [28:58<02:20,  7.00it/s]


 epoch: 14013 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14014 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%


 93%|█████████▎| 14016/15000 [28:59<02:06,  7.77it/s]


 epoch: 14015 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14016 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 93%|█████████▎| 14018/15000 [28:59<02:06,  7.79it/s]


 epoch: 14017 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14018 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%


 93%|█████████▎| 14020/15000 [28:59<02:08,  7.65it/s]


 epoch: 14019 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

input:       logo to represent the subreddit redditors can also submit their own logos which sometimes appear on the site front

target:      logo to represent the subreddit redditors can also submit their own logos which sometimes appear on the site front page

prediction:  logo to represent the subreddit redditors can also submit their own logos which sometimes appear on the site front the

 epoch: 14020 | train_loss: 0.19, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 93%|█████████▎| 14021/15000 [28:59<01:59,  8.19it/s]


 epoch: 14021 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 93%|█████████▎| 14023/15000 [29:00<03:22,  4.82it/s]


 epoch: 14022 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14023 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 94%|█████████▎| 14026/15000 [29:00<02:15,  7.20it/s]


 epoch: 14024 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14025 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14026 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 94%|█████████▎| 14029/15000 [29:01<01:52,  8.65it/s]


 epoch: 14027 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14028 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14029 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 94%|█████████▎| 14030/15000 [29:01<01:59,  8.13it/s]


input:       income citation needed in the second half of the th century bc athens fell under the tyranny of pisistratus

target:      income citation needed in the second half of the th century bc athens fell under the tyranny of pisistratus followed

prediction:  income citation needed in the second half of the th century bc athens fell under the tyranny of pisistratus the

 epoch: 14030 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14031 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 94%|█████████▎| 14033/15000 [29:01<01:49,  8.81it/s]


 epoch: 14032 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 96.8%

 epoch: 14033 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 94%|█████████▎| 14034/15000 [29:01<01:48,  8.93it/s]


 epoch: 14034 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 94%|█████████▎| 14037/15000 [29:02<02:18,  6.97it/s]


 epoch: 14035 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14036 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14037 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 94%|█████████▎| 14039/15000 [29:02<02:02,  7.82it/s]


 epoch: 14038 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14039 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

input:       the monsoon circulation dominates across southern and eastern sections due to the presence of the himalayas forcing the formation


 94%|█████████▎| 14040/15000 [29:02<02:11,  7.32it/s]


target:      the monsoon circulation dominates across southern and eastern sections due to the presence of the himalayas forcing the formation of

prediction:  the monsoon circulation dominates across southern and eastern sections due to the presence of the himalayas forcing the formation the

 epoch: 14040 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 14041 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 94%|█████████▎| 14044/15000 [29:02<01:46,  9.02it/s]


 epoch: 14042 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14043 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 14044 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%


 94%|█████████▎| 14047/15000 [29:03<01:40,  9.49it/s]


 epoch: 14045 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14046 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14047 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14048 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 94%|█████████▎| 14050/15000 [29:04<02:50,  5.56it/s]


 epoch: 14049 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

input:       colosseum became the regular arena for blood sports in rome many roman amphitheatres circuses and theatres built in cities

target:      colosseum became the regular arena for blood sports in rome many roman amphitheatres circuses and theatres built in cities outside

prediction:  colosseum became the regular arena for blood sports in rome many roman amphitheatres circuses and theatres built in cities the

 epoch: 14050 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 94%|█████████▎| 14052/15000 [29:04<02:20,  6.77it/s]


 epoch: 14051 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14052 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14053 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 94%|█████████▎| 14055/15000 [29:04<01:58,  7.98it/s]


 epoch: 14054 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14055 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14056 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 94%|█████████▎| 14059/15000 [29:04<01:38,  9.51it/s]


 epoch: 14057 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14058 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14059 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 94%|█████████▎| 14061/15000 [29:05<01:42,  9.19it/s]


input:       users who earn the title by creating subreddit or being promoted by current moderator reddit users may also request

target:      users who earn the title by creating subreddit or being promoted by current moderator reddit users may also request to

prediction:  users who earn the title by creating subreddit or being promoted by current moderator reddit users may also request the

 epoch: 14060 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14061 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 94%|█████████▎| 14062/15000 [29:05<01:41,  9.22it/s]


 epoch: 14062 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14063 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 94%|█████████▍| 14065/15000 [29:05<02:39,  5.87it/s]


 epoch: 14064 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.27, test_acc: 96.8%

 epoch: 14065 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 94%|█████████▍| 14068/15000 [29:06<02:02,  7.60it/s]


 epoch: 14066 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14067 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14068 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.0%


 94%|█████████▍| 14070/15000 [29:06<01:58,  7.82it/s]


 epoch: 14069 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

input:       the east antarctic ice sheet as whole range from slightly positive to slightly negative increased ice outflow has been

target:      the east antarctic ice sheet as whole range from slightly positive to slightly negative increased ice outflow has been observed

prediction:  the east antarctic ice sheet as whole range from slightly positive to slightly negative increased ice outflow has been the

 epoch: 14070 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 94%|█████████▍| 14073/15000 [29:06<01:45,  8.77it/s]


 epoch: 14071 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14072 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14073 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.26, test_acc: 96.9%


 94%|█████████▍| 14075/15000 [29:07<01:42,  9.05it/s]


 epoch: 14074 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14075 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 94%|█████████▍| 14077/15000 [29:07<01:38,  9.35it/s]


 epoch: 14076 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14077 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 94%|█████████▍| 14078/15000 [29:07<03:24,  4.50it/s]


 epoch: 14078 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14079 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

input:       in asia but is considered european nation both culturally and politically the gobi desert is in mongolia and the

target:      in asia but is considered european nation both culturally and politically the gobi desert is in mongolia and the arabian

prediction:  in asia but is considered european nation both culturally and politically the gobi desert is in mongolia and the the


 94%|█████████▍| 14082/15000 [29:08<02:08,  7.12it/s]


 epoch: 14080 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14081 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14082 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.26, test_acc: 97.0%


 94%|█████████▍| 14084/15000 [29:08<01:53,  8.06it/s]


 epoch: 14083 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14084 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 96.9%

 epoch: 14085 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 94%|█████████▍| 14087/15000 [29:08<01:41,  8.99it/s]


 epoch: 14086 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14087 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 94%|█████████▍| 14088/15000 [29:08<01:40,  9.04it/s]


 epoch: 14088 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14089 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

input:       recent took place in santiago in south american cricket championship is an international one day cricket tournament played since

target:      recent took place in santiago in south american cricket championship is an international one day cricket tournament played since featuring

prediction:  recent took place in santiago in south american cricket championship is an international one day cricket tournament played since the


 94%|█████████▍| 14091/15000 [29:09<01:41,  8.98it/s]


 epoch: 14090 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.8%

 epoch: 14091 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 94%|█████████▍| 14093/15000 [29:09<02:54,  5.21it/s]


 epoch: 14092 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14093 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 94%|█████████▍| 14095/15000 [29:10<02:27,  6.15it/s]


 epoch: 14094 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14095 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 94%|█████████▍| 14098/15000 [29:10<01:51,  8.07it/s]


 epoch: 14096 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14097 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14098 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 94%|█████████▍| 14100/15000 [29:10<01:58,  7.62it/s]


 epoch: 14099 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

input:       each other in sequences which are usually represented by distinct letters in alphabetic scripts such as the roman script

target:      each other in sequences which are usually represented by distinct letters in alphabetic scripts such as the roman script in

prediction:  each other in sequences which are usually represented by distinct letters in alphabetic scripts such as the roman script the

 epoch: 14100 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 94%|█████████▍| 14102/15000 [29:10<01:47,  8.38it/s]


 epoch: 14101 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14102 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 94%|█████████▍| 14104/15000 [29:11<01:53,  7.90it/s]


 epoch: 14103 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14104 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 94%|█████████▍| 14106/15000 [29:11<01:55,  7.71it/s]


 epoch: 14105 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14106 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%


 94%|█████████▍| 14108/15000 [29:11<01:44,  8.50it/s]


 epoch: 14107 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14108 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 94%|█████████▍| 14110/15000 [29:11<02:00,  7.36it/s]


 epoch: 14109 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.2%

input:       branches of mathematics were part of natural philosophy but during the scientific revolution in the th century these natural

target:      branches of mathematics were part of natural philosophy but during the scientific revolution in the th century these natural sciences

prediction:  branches of mathematics were part of natural philosophy but during the scientific revolution in the th century these natural the

 epoch: 14110 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%


 94%|█████████▍| 14112/15000 [29:12<01:52,  7.93it/s]


 epoch: 14111 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14112 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 94%|█████████▍| 14114/15000 [29:12<01:50,  7.98it/s]


 epoch: 14113 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14114 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 94%|█████████▍| 14116/15000 [29:12<01:50,  7.99it/s]


 epoch: 14115 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14116 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%


 94%|█████████▍| 14118/15000 [29:12<01:46,  8.29it/s]


 epoch: 14117 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14118 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 94%|█████████▍| 14120/15000 [29:13<01:59,  7.36it/s]


 epoch: 14119 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

input:       part of the pacific particularly the western pacific is variously delimited adding that majority viewpoint accepts the landmasses of

target:      part of the pacific particularly the western pacific is variously delimited adding that majority viewpoint accepts the landmasses of japan

prediction:  part of the pacific particularly the western pacific is variously delimited adding that majority viewpoint accepts the landmasses of the

 epoch: 14120 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%


 94%|█████████▍| 14123/15000 [29:13<02:38,  5.53it/s]


 epoch: 14121 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14122 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14123 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 94%|█████████▍| 14126/15000 [29:14<01:58,  7.40it/s]


 epoch: 14124 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14125 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14126 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 94%|█████████▍| 14128/15000 [29:14<01:44,  8.31it/s]


 epoch: 14127 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14128 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14129 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 94%|█████████▍| 14131/15000 [29:14<01:42,  8.49it/s]


input:       observed leonardo at work and wrote that some days he would paint from dawn till dusk without stopping to

target:      observed leonardo at work and wrote that some days he would paint from dawn till dusk without stopping to eat

prediction:  observed leonardo at work and wrote that some days he would paint from dawn till dusk without stopping to the

 epoch: 14130 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14131 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 94%|█████████▍| 14133/15000 [29:14<01:35,  9.05it/s]


 epoch: 14132 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14133 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 94%|█████████▍| 14134/15000 [29:14<01:36,  8.96it/s]


 epoch: 14134 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%


 94%|█████████▍| 14136/15000 [29:15<02:39,  5.41it/s]


 epoch: 14135 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14136 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 94%|█████████▍| 14139/15000 [29:15<01:56,  7.36it/s]


 epoch: 14137 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14138 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14139 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 94%|█████████▍| 14140/15000 [29:16<02:02,  6.99it/s]


input:       unable to retaliate when ridiculed later in greek philosophy aristotle in the poetics pp suggested that an ugliness that

target:      unable to retaliate when ridiculed later in greek philosophy aristotle in the poetics pp suggested that an ugliness that does

prediction:  unable to retaliate when ridiculed later in greek philosophy aristotle in the poetics pp suggested that an ugliness that the

 epoch: 14140 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

 epoch: 14141 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 94%|█████████▍| 14144/15000 [29:16<01:38,  8.69it/s]


 epoch: 14142 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14143 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14144 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 94%|█████████▍| 14147/15000 [29:16<01:30,  9.39it/s]


 epoch: 14145 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14146 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14147 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14148 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 94%|█████████▍| 14150/15000 [29:17<02:10,  6.52it/s]


 epoch: 14149 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

input:       interface the separation of the api from its implementation can allow programs written in one language to use library

target:      interface the separation of the api from its implementation can allow programs written in one language to use library written

prediction:  interface the separation of the api from its implementation can allow programs written in one language to use library the

 epoch: 14150 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%


 94%|█████████▍| 14153/15000 [29:17<01:45,  8.05it/s]


 epoch: 14151 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.20, test_acc: 97.3%

 epoch: 14152 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14153 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.1%


 94%|█████████▍| 14155/15000 [29:17<01:35,  8.85it/s]


 epoch: 14154 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14155 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14156 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 94%|█████████▍| 14159/15000 [29:18<01:25,  9.79it/s]


 epoch: 14157 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14158 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14159 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.3%


 94%|█████████▍| 14161/15000 [29:18<01:31,  9.13it/s]


input:       not safe for work communities and others that are most commonly filtered out by users even if they are

target:      not safe for work communities and others that are most commonly filtered out by users even if they are safe

prediction:  not safe for work communities and others that are most commonly filtered out by users even if they are the

 epoch: 14160 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14161 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 94%|█████████▍| 14162/15000 [29:18<01:30,  9.27it/s]


 epoch: 14162 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 94%|█████████▍| 14165/15000 [29:19<02:08,  6.50it/s]


 epoch: 14163 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14164 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14165 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 94%|█████████▍| 14168/15000 [29:19<01:43,  8.06it/s]


 epoch: 14166 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14167 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14168 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 94%|█████████▍| 14170/15000 [29:19<01:39,  8.31it/s]


 epoch: 14169 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

input:       the light speed invariance and temperatures dropped by factor of this concept is motivated by the flatness problem where

target:      the light speed invariance and temperatures dropped by factor of this concept is motivated by the flatness problem where the

prediction:  the light speed invariance and temperatures dropped by factor of this concept is motivated by the flatness problem where the

 epoch: 14170 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%


 94%|█████████▍| 14173/15000 [29:20<01:31,  9.04it/s]


 epoch: 14171 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 14172 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14173 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 94%|█████████▍| 14175/15000 [29:20<01:29,  9.21it/s]


 epoch: 14174 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14175 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 95%|█████████▍| 14176/15000 [29:20<01:28,  9.28it/s]


 epoch: 14176 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 95%|█████████▍| 14179/15000 [29:21<02:19,  5.88it/s]


 epoch: 14177 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.19, test_acc: 97.4%

 epoch: 14178 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14179 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 95%|█████████▍| 14180/15000 [29:21<02:13,  6.13it/s]


input:       and selection of military personnel mental testing also became popular in the where it was applied to schoolchildren

target:      and selection of military personnel mental testing also became popular in the where it was applied to schoolchildren the

prediction:  and selection of military personnel mental testing also became popular in the where it was applied to schoolchildren the

 epoch: 14180 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14181 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 95%|█████████▍| 14184/15000 [29:21<01:36,  8.46it/s]


 epoch: 14182 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14183 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.9%

 epoch: 14184 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 95%|█████████▍| 14187/15000 [29:21<01:27,  9.27it/s]


 epoch: 14185 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%

 epoch: 14186 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14187 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 95%|█████████▍| 14189/15000 [29:22<01:25,  9.51it/s]


 epoch: 14188 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14189 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       lower egypt in the north while rival clan based in thebes the intef family took control of upper egypt

target:      lower egypt in the north while rival clan based in thebes the intef family took control of upper egypt in

prediction:  lower egypt in the north while rival clan based in thebes the intef family took control of upper egypt the


 95%|█████████▍| 14190/15000 [29:22<01:31,  8.82it/s]


 epoch: 14190 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%


 95%|█████████▍| 14193/15000 [29:23<02:12,  6.10it/s]


 epoch: 14191 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14192 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14193 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 95%|█████████▍| 14195/15000 [29:23<01:52,  7.16it/s]


 epoch: 14194 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14195 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14196 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 95%|█████████▍| 14199/15000 [29:23<01:29,  8.96it/s]


 epoch: 14197 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14198 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14199 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 95%|█████████▍| 14200/15000 [29:23<01:38,  8.14it/s]


input:       poorly represented by extant fragments literacy began to decline during the crisis of the third century the emperor julian

target:      poorly represented by extant fragments literacy began to decline during the crisis of the third century the emperor julian banned

prediction:  poorly represented by extant fragments literacy began to decline during the crisis of the third century the emperor julian the

 epoch: 14200 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 95%|█████████▍| 14202/15000 [29:24<01:40,  7.96it/s]


 epoch: 14201 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14202 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 95%|█████████▍| 14204/15000 [29:24<01:40,  7.91it/s]


 epoch: 14203 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14204 | train_loss: 0.21, train_acc: 97.5% | test_loss: 0.25, test_acc: 97.1%


 95%|█████████▍| 14205/15000 [29:24<01:39,  7.95it/s]


 epoch: 14205 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%


 95%|█████████▍| 14207/15000 [29:25<02:45,  4.80it/s]


 epoch: 14206 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14207 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 95%|█████████▍| 14209/15000 [29:25<02:08,  6.16it/s]


 epoch: 14208 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14209 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 95%|█████████▍| 14210/15000 [29:25<02:16,  5.80it/s]


input:       for work the subreddit all originally did not filter topics but as of it does not include not safe

target:      for work the subreddit all originally did not filter topics but as of it does not include not safe for

prediction:  for work the subreddit all originally did not filter topics but as of it does not include not safe the

 epoch: 14210 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 95%|█████████▍| 14212/15000 [29:25<01:53,  6.94it/s]


 epoch: 14211 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 14212 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 95%|█████████▍| 14214/15000 [29:26<01:46,  7.38it/s]


 epoch: 14213 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14214 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 95%|█████████▍| 14216/15000 [29:26<01:46,  7.38it/s]


 epoch: 14215 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14216 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 95%|█████████▍| 14218/15000 [29:26<01:37,  8.05it/s]


 epoch: 14217 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14218 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 95%|█████████▍| 14219/15000 [29:26<01:34,  8.25it/s]


 epoch: 14219 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%

input:       the censorship of skulls and other symbols some other changes were in the cosmetics in certain maps for example

target:      the censorship of skulls and other symbols some other changes were in the cosmetics in certain maps for example the

prediction:  the censorship of skulls and other symbols some other changes were in the cosmetics in certain maps for example the


 95%|█████████▍| 14221/15000 [29:27<02:08,  6.07it/s]


 epoch: 14220 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14221 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


 95%|█████████▍| 14223/15000 [29:27<01:48,  7.13it/s]


 epoch: 14222 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14223 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 95%|█████████▍| 14225/15000 [29:27<01:36,  8.05it/s]


 epoch: 14224 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14225 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14226 | train_loss: 0.19, train_acc: 97.3% | test_loss: 0.24, test_acc: 96.8%


 95%|█████████▍| 14229/15000 [29:27<01:19,  9.65it/s]


 epoch: 14227 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.4%

 epoch: 14228 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14229 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%


 95%|█████████▍| 14231/15000 [29:28<01:26,  8.91it/s]


input:       citizen led interviews with famous people on the popular ama subreddit organizers of the blackout also expressed resentment about

target:      citizen led interviews with famous people on the popular ama subreddit organizers of the blackout also expressed resentment about the

prediction:  citizen led interviews with famous people on the popular ama subreddit organizers of the blackout also expressed resentment about the

 epoch: 14230 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14231 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 95%|█████████▍| 14232/15000 [29:28<01:24,  9.07it/s]


 epoch: 14232 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14233 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%


 95%|█████████▍| 14236/15000 [29:29<02:04,  6.16it/s]


 epoch: 14234 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14235 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 14236 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 95%|█████████▍| 14238/15000 [29:29<01:45,  7.21it/s]


 epoch: 14237 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14238 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14239 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 95%|█████████▍| 14240/15000 [29:29<01:40,  7.55it/s]


input:       parents care about their educational efforts this tends to lead to increased self esteem better attendance rates and more

target:      parents care about their educational efforts this tends to lead to increased self esteem better attendance rates and more constructive

prediction:  parents care about their educational efforts this tends to lead to increased self esteem better attendance rates and more the

 epoch: 14240 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 14241 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%


 95%|█████████▍| 14244/15000 [29:29<01:24,  8.97it/s]


 epoch: 14242 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14243 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14244 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%


 95%|█████████▍| 14246/15000 [29:30<01:21,  9.29it/s]


 epoch: 14245 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14246 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 14247 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 95%|█████████▍| 14248/15000 [29:30<02:42,  4.62it/s]


 epoch: 14248 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14249 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       his feeling for line and for light and shade forever transmuted it into life communicating values the interest in

target:      his feeling for line and for light and shade forever transmuted it into life communicating values the interest in leonardo

prediction:  his feeling for line and for light and shade forever transmuted it into life communicating values the interest in the


 95%|█████████▌| 14252/15000 [29:31<01:45,  7.09it/s]


 epoch: 14250 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14251 | train_loss: 0.18, train_acc: 97.5% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14252 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 95%|█████████▌| 14254/15000 [29:31<01:32,  8.05it/s]


 epoch: 14253 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14254 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14255 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.19, test_acc: 97.4%


 95%|█████████▌| 14258/15000 [29:31<01:19,  9.37it/s]


 epoch: 14256 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14257 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14258 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 95%|█████████▌| 14260/15000 [29:31<01:20,  9.15it/s]


 epoch: 14259 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

input:       culture the hacker ethic and gamer culture which to varying degrees embrace and amplify cultural values such as curious

target:      culture the hacker ethic and gamer culture which to varying degrees embrace and amplify cultural values such as curious playfulness

prediction:  culture the hacker ethic and gamer culture which to varying degrees embrace and amplify cultural values such as curious the

 epoch: 14260 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 96.8%

 epoch: 14261 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 95%|█████████▌| 14263/15000 [29:32<01:47,  6.83it/s]


 epoch: 14262 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14263 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14264 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 95%|█████████▌| 14267/15000 [29:32<01:25,  8.59it/s]


 epoch: 14265 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14266 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14267 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 95%|█████████▌| 14268/15000 [29:33<01:23,  8.78it/s]


 epoch: 14268 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14269 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.4%

input:       an applied discipline like geology or electrical engineering it usually differs from engineering in that an applied physicist may

target:      an applied discipline like geology or electrical engineering it usually differs from engineering in that an applied physicist may not

prediction:  an applied discipline like geology or electrical engineering it usually differs from engineering in that an applied physicist may the


 95%|█████████▌| 14272/15000 [29:33<01:19,  9.12it/s]


 epoch: 14270 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14271 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14272 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 95%|█████████▌| 14275/15000 [29:33<01:16,  9.50it/s]


 epoch: 14273 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14274 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%

 epoch: 14275 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 95%|█████████▌| 14278/15000 [29:34<01:48,  6.68it/s]


 epoch: 14276 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14277 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14278 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 95%|█████████▌| 14280/15000 [29:34<01:45,  6.83it/s]


 epoch: 14279 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       beautiful and harkens back to the saint jerome with the figure set at an oblique angle what makes this

target:      beautiful and harkens back to the saint jerome with the figure set at an oblique angle what makes this painting

prediction:  beautiful and harkens back to the saint jerome with the figure set at an oblique angle what makes this the

 epoch: 14280 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 95%|█████████▌| 14282/15000 [29:34<01:29,  7.99it/s]


 epoch: 14281 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14282 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14283 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%


 95%|█████████▌| 14286/15000 [29:35<01:16,  9.29it/s]


 epoch: 14284 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14285 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14286 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 95%|█████████▌| 14288/15000 [29:35<01:16,  9.31it/s]


 epoch: 14287 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14288 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 95%|█████████▌| 14289/15000 [29:35<01:15,  9.39it/s]


 epoch: 14289 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       may develop false ideas and inaccurately assess their learning progress it is closely related to lifelong education which is

target:      may develop false ideas and inaccurately assess their learning progress it is closely related to lifelong education which is an

prediction:  may develop false ideas and inaccurately assess their learning progress it is closely related to lifelong education which is the


 95%|█████████▌| 14292/15000 [29:36<02:04,  5.70it/s]


 epoch: 14290 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14291 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14292 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 95%|█████████▌| 14295/15000 [29:36<01:36,  7.32it/s]


 epoch: 14293 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.4%

 epoch: 14294 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14295 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 95%|█████████▌| 14297/15000 [29:36<01:24,  8.28it/s]


 epoch: 14296 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14297 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.22, test_acc: 96.9%

 epoch: 14298 | train_loss: 0.22, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.1%


 95%|█████████▌| 14300/15000 [29:37<01:21,  8.60it/s]


 epoch: 14299 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

input:       cultural analogue to gene meme theory originated as an attempt to apply biological evolutionary principles to cultural information transfer

target:      cultural analogue to gene meme theory originated as an attempt to apply biological evolutionary principles to cultural information transfer and

prediction:  cultural analogue to gene meme theory originated as an attempt to apply biological evolutionary principles to cultural information transfer the

 epoch: 14300 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 95%|█████████▌| 14303/15000 [29:37<01:14,  9.35it/s]


 epoch: 14301 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14302 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14303 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 95%|█████████▌| 14305/15000 [29:38<01:51,  6.22it/s]


 epoch: 14304 | train_loss: 0.25, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14305 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%


 95%|█████████▌| 14307/15000 [29:38<01:37,  7.13it/s]


 epoch: 14306 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14307 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 95%|█████████▌| 14309/15000 [29:38<01:30,  7.62it/s]


 epoch: 14308 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.8%

 epoch: 14309 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.0%


 95%|█████████▌| 14310/15000 [29:38<01:40,  6.83it/s]


input:       of china has built increasingly stronger ties with african nations and is africa largest trading partner in chinese companies

target:      of china has built increasingly stronger ties with african nations and is africa largest trading partner in chinese companies invested

prediction:  of china has built increasingly stronger ties with african nations and is africa largest trading partner in chinese companies the

 epoch: 14310 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 95%|█████████▌| 14312/15000 [29:39<01:32,  7.42it/s]


 epoch: 14311 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14312 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 95%|█████████▌| 14314/15000 [29:39<01:30,  7.55it/s]


 epoch: 14313 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14314 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 95%|█████████▌| 14316/15000 [29:39<01:34,  7.27it/s]


 epoch: 14315 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14316 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 95%|█████████▌| 14318/15000 [29:39<01:28,  7.74it/s]


 epoch: 14317 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14318 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 95%|█████████▌| 14320/15000 [29:40<02:37,  4.31it/s]


 epoch: 14319 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

input:       george iii acknowledged britain defeat in the war leading to the signing of the treaty of paris on september

target:      george iii acknowledged britain defeat in the war leading to the signing of the treaty of paris on september which

prediction:  george iii acknowledged britain defeat in the war leading to the signing of the treaty of paris on september the

 epoch: 14320 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 95%|█████████▌| 14322/15000 [29:40<02:02,  5.54it/s]


 epoch: 14321 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 14322 | train_loss: 0.19, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 95%|█████████▌| 14324/15000 [29:41<01:41,  6.63it/s]


 epoch: 14323 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14324 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 96%|█████████▌| 14327/15000 [29:41<01:23,  8.07it/s]


 epoch: 14325 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14326 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.26, test_acc: 96.9%

 epoch: 14327 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 96%|█████████▌| 14329/15000 [29:41<01:15,  8.92it/s]


 epoch: 14328 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14329 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

input:       mediterranean black and caspian seas to the south land relief in europe shows great variation within relatively small areas

target:      mediterranean black and caspian seas to the south land relief in europe shows great variation within relatively small areas the

prediction:  mediterranean black and caspian seas to the south land relief in europe shows great variation within relatively small areas the


 96%|█████████▌| 14332/15000 [29:41<01:12,  9.18it/s]


 epoch: 14330 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14331 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14332 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 96%|█████████▌| 14335/15000 [29:42<01:20,  8.31it/s]


 epoch: 14333 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14334 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14335 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 96%|█████████▌| 14338/15000 [29:42<01:12,  9.09it/s]


 epoch: 14336 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14337 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14338 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.4%


 96%|█████████▌| 14340/15000 [29:42<01:19,  8.32it/s]


 epoch: 14339 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

input:       reddit hug of death users have used reddit as platform for their charitable and philanthropic efforts redditors raised more

target:      reddit hug of death users have used reddit as platform for their charitable and philanthropic efforts redditors raised more than

prediction:  reddit hug of death users have used reddit as platform for their charitable and philanthropic efforts redditors raised more the

 epoch: 14340 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 96%|█████████▌| 14342/15000 [29:43<01:13,  8.95it/s]


 epoch: 14341 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14342 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 96%|█████████▌| 14345/15000 [29:43<01:09,  9.36it/s]


 epoch: 14343 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14344 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 14345 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%


 96%|█████████▌| 14348/15000 [29:44<01:45,  6.15it/s]


 epoch: 14346 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14347 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 14348 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 96.9%


 96%|█████████▌| 14350/15000 [29:44<01:36,  6.76it/s]


 epoch: 14349 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

input:       audiophiles who prefer the sound of analog vinyl records to digital recordings in for the first time since the

target:      audiophiles who prefer the sound of analog vinyl records to digital recordings in for the first time since the vinyl

prediction:  audiophiles who prefer the sound of analog vinyl records to digital recordings in for the first time since the the

 epoch: 14350 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 96%|█████████▌| 14352/15000 [29:44<01:24,  7.67it/s]


 epoch: 14351 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14352 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14353 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 96%|█████████▌| 14356/15000 [29:44<01:10,  9.09it/s]


 epoch: 14354 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%

 epoch: 14355 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14356 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 96%|█████████▌| 14358/15000 [29:45<01:10,  9.10it/s]


 epoch: 14357 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14358 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 14359 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

input:       platform later five day testing period began during the testing period streaming was for select group of users allowing

target:      platform later five day testing period began during the testing period streaming was for select group of users allowing minutes

prediction:  platform later five day testing period began during the testing period streaming was for select group of users allowing the


 96%|█████████▌| 14362/15000 [29:46<01:43,  6.16it/s]


 epoch: 14360 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14361 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14362 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 96%|█████████▌| 14365/15000 [29:46<01:21,  7.75it/s]


 epoch: 14363 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14364 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14365 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 96%|█████████▌| 14367/15000 [29:46<01:13,  8.57it/s]


 epoch: 14366 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%

 epoch: 14367 | train_loss: 0.23, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14368 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 96%|█████████▌| 14369/15000 [29:46<01:08,  9.16it/s]


 epoch: 14369 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

input:       polygon blockchain powered digital wallet the vault richard lawler of the verge described them as non fungible tokens nfts

target:      polygon blockchain powered digital wallet the vault richard lawler of the verge described them as non fungible tokens nfts that

prediction:  polygon blockchain powered digital wallet the vault richard lawler of the verge described them as non fungible tokens nfts the

 epoch: 14370 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 96%|█████████▌| 14373/15000 [29:47<01:06,  9.44it/s]


 epoch: 14371 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14372 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14373 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 96%|█████████▌| 14375/15000 [29:47<01:08,  9.07it/s]


 epoch: 14374 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.20, test_acc: 97.4%

 epoch: 14375 | train_loss: 0.19, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%


 96%|█████████▌| 14377/15000 [29:47<01:05,  9.45it/s]


 epoch: 14376 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.7%

 epoch: 14377 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14378 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 96%|█████████▌| 14380/15000 [29:47<01:08,  9.09it/s]


 epoch: 14379 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

input:       prosperity antoninus pius reign was comparatively peaceful there were several military disturbances throughout the empire in his time in

target:      prosperity antoninus pius reign was comparatively peaceful there were several military disturbances throughout the empire in his time in mauretania

prediction:  prosperity antoninus pius reign was comparatively peaceful there were several military disturbances throughout the empire in his time in the

 epoch: 14380 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


 96%|█████████▌| 14382/15000 [29:48<01:03,  9.69it/s]


 epoch: 14381 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.2%

 epoch: 14382 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14383 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 96%|█████████▌| 14386/15000 [29:48<01:00, 10.13it/s]


 epoch: 14384 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 14385 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%

 epoch: 14386 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


 96%|█████████▌| 14388/15000 [29:48<01:00, 10.14it/s]


 epoch: 14387 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14388 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 96%|█████████▌| 14390/15000 [29:49<01:45,  5.80it/s]


 epoch: 14389 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

input:       models to explain the apparent motion of the planets were developed in the th century bc by eudoxus of

target:      models to explain the apparent motion of the planets were developed in the th century bc by eudoxus of cnidus

prediction:  models to explain the apparent motion of the planets were developed in the th century bc by eudoxus of the

 epoch: 14390 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.1%


 96%|█████████▌| 14392/15000 [29:49<01:29,  6.76it/s]


 epoch: 14391 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14392 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14393 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 96%|█████████▌| 14396/15000 [29:49<01:10,  8.51it/s]


 epoch: 14394 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14395 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14396 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 96%|█████████▌| 14398/15000 [29:50<01:06,  9.08it/s]


 epoch: 14397 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14398 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14399 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 96%|█████████▌| 14400/15000 [29:50<01:08,  8.72it/s]


input:       led to australian variants of their staple foods such as the chinese inspired dim sim and chiko roll the

target:      led to australian variants of their staple foods such as the chinese inspired dim sim and chiko roll the music

prediction:  led to australian variants of their staple foods such as the chinese inspired dim sim and chiko roll the the

 epoch: 14400 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14401 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 96%|█████████▌| 14402/15000 [29:50<01:04,  9.23it/s]


 epoch: 14402 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 96%|█████████▌| 14405/15000 [29:51<01:31,  6.53it/s]


 epoch: 14403 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14404 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14405 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 96%|█████████▌| 14407/15000 [29:51<01:23,  7.11it/s]


 epoch: 14406 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14407 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.19, test_acc: 97.2%


 96%|█████████▌| 14409/15000 [29:51<01:16,  7.72it/s]


 epoch: 14408 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14409 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

input:       bc to possibly million by the st millennium bc before growing significantly towards the end of that millennium according


 96%|█████████▌| 14411/15000 [29:51<01:19,  7.43it/s]


target:      bc to possibly million by the st millennium bc before growing significantly towards the end of that millennium according to

prediction:  bc to possibly million by the st millennium bc before growing significantly towards the end of that millennium according the

 epoch: 14410 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14411 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


 96%|█████████▌| 14413/15000 [29:52<01:10,  8.38it/s]


 epoch: 14412 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14413 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 96%|█████████▌| 14415/15000 [29:52<01:10,  8.32it/s]


 epoch: 14414 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.3%

 epoch: 14415 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 96%|█████████▌| 14416/15000 [29:52<01:08,  8.47it/s]


 epoch: 14416 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 96%|█████████▌| 14418/15000 [29:53<01:59,  4.87it/s]


 epoch: 14417 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14418 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 96%|█████████▌| 14420/15000 [29:53<01:43,  5.61it/s]


 epoch: 14419 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       available for windows sp windows server sp and windows server new features in powershell include windows management framework wmf

target:      available for windows sp windows server sp and windows server new features in powershell include windows management framework wmf rtm

prediction:  available for windows sp windows server sp and windows server new features in powershell include windows management framework wmf the

 epoch: 14420 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 96%|█████████▌| 14422/15000 [29:53<01:22,  6.99it/s]


 epoch: 14421 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14422 | train_loss: 0.26, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 96%|█████████▌| 14424/15000 [29:53<01:10,  8.13it/s]


 epoch: 14423 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14424 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 96%|█████████▌| 14426/15000 [29:54<01:09,  8.27it/s]


 epoch: 14425 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14426 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 96%|█████████▌| 14428/15000 [29:54<01:09,  8.26it/s]


 epoch: 14427 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 14428 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%


 96%|█████████▌| 14430/15000 [29:54<01:17,  7.36it/s]


 epoch: 14429 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

input:       changes and successfully face new challenges for example it can help raise awareness and contribute to the solution of

target:      changes and successfully face new challenges for example it can help raise awareness and contribute to the solution of contemporary

prediction:  changes and successfully face new challenges for example it can help raise awareness and contribute to the solution of the

 epoch: 14430 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 96%|█████████▌| 14431/15000 [29:54<01:16,  7.47it/s]


 epoch: 14431 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%


 96%|█████████▌| 14433/15000 [29:55<01:58,  4.80it/s]


 epoch: 14432 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14433 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 96%|█████████▌| 14435/15000 [29:55<01:27,  6.47it/s]


 epoch: 14434 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14435 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%

 epoch: 14436 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 96%|█████████▋| 14438/15000 [29:55<01:07,  8.31it/s]


 epoch: 14437 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14438 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%


 96%|█████████▋| 14440/15000 [29:56<01:10,  7.96it/s]


 epoch: 14439 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

input:       almost metres ft antarctica holds the record for the lowest measured temperature on earth the coastal regions can

target:      almost metres ft antarctica holds the record for the lowest measured temperature on earth the coastal regions can reach

prediction:  almost metres ft antarctica holds the record for the lowest measured temperature on earth the coastal regions can the

 epoch: 14440 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14441 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 96%|█████████▋| 14443/15000 [29:56<01:00,  9.14it/s]


 epoch: 14442 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14443 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14444 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 96%|█████████▋| 14445/15000 [29:56<00:57,  9.59it/s]


 epoch: 14445 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 96%|█████████▋| 14448/15000 [29:57<01:04,  8.57it/s]


 epoch: 14446 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14447 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14448 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 96%|█████████▋| 14450/15000 [29:57<01:04,  8.52it/s]


 epoch: 14449 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       and pervasive examples are variations of the native dish called succotash early settlers and later immigrants combined these with

target:      and pervasive examples are variations of the native dish called succotash early settlers and later immigrants combined these with foods

prediction:  and pervasive examples are variations of the native dish called succotash early settlers and later immigrants combined these with the

 epoch: 14450 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%


 96%|█████████▋| 14453/15000 [29:57<00:59,  9.26it/s]


 epoch: 14451 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14452 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 96.9%

 epoch: 14453 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 96%|█████████▋| 14455/15000 [29:57<00:55,  9.82it/s]


 epoch: 14454 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 14455 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14456 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 96%|█████████▋| 14459/15000 [29:58<00:53, 10.04it/s]


 epoch: 14457 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14458 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14459 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       and multifaceted economic systems the has the largest economy of all three countries and in the world in

target:      and multifaceted economic systems the has the largest economy of all three countries and in the world in the

prediction:  and multifaceted economic systems the has the largest economy of all three countries and in the world in the


 96%|█████████▋| 14461/15000 [29:58<01:27,  6.13it/s]


 epoch: 14460 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14461 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.2%

 epoch: 14462 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 96%|█████████▋| 14465/15000 [29:59<01:08,  7.83it/s]


 epoch: 14463 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14464 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 96.9%

 epoch: 14465 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.4%


 96%|█████████▋| 14467/15000 [29:59<01:03,  8.44it/s]


 epoch: 14466 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14467 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14468 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%


 96%|█████████▋| 14470/15000 [29:59<01:04,  8.18it/s]


 epoch: 14469 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

input:       bacteria to grow on the by products of nylon manufacturing and the soil bacterium sphingobium evolving an entirely new

target:      bacteria to grow on the by products of nylon manufacturing and the soil bacterium sphingobium evolving an entirely new metabolic

prediction:  bacteria to grow on the by products of nylon manufacturing and the soil bacterium sphingobium evolving an entirely new the

 epoch: 14470 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 96%|█████████▋| 14472/15000 [29:59<01:00,  8.75it/s]


 epoch: 14471 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14472 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 96%|█████████▋| 14473/15000 [30:00<01:00,  8.74it/s]


 epoch: 14473 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 96%|█████████▋| 14475/15000 [30:00<01:35,  5.48it/s]


 epoch: 14474 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14475 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 97%|█████████▋| 14478/15000 [30:00<01:08,  7.60it/s]


 epoch: 14476 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14477 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14478 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%


 97%|█████████▋| 14480/15000 [30:01<01:08,  7.62it/s]


 epoch: 14479 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       to schools where children are taught under tree with small no campus and are free of cost there are

target:      to schools where children are taught under tree with small no campus and are free of cost there are various

prediction:  to schools where children are taught under tree with small no campus and are free of cost there are the

 epoch: 14480 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 97%|█████████▋| 14483/15000 [30:01<00:57,  9.04it/s]


 epoch: 14481 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14482 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14483 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%


 97%|█████████▋| 14485/15000 [30:01<00:53,  9.61it/s]


 epoch: 14484 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14485 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14486 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.7%


 97%|█████████▋| 14487/15000 [30:01<00:51,  9.97it/s]


 epoch: 14487 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 97%|█████████▋| 14489/15000 [30:02<01:10,  7.22it/s]


 epoch: 14488 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14489 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

input:       the man page describes git as the stupid content tracker the read me file of the source code elaborates

target:      the man page describes git as the stupid content tracker the read me file of the source code elaborates further

prediction:  the man page describes git as the stupid content tracker the read me file of the source code elaborates the


 97%|█████████▋| 14492/15000 [30:02<01:02,  8.17it/s]


 epoch: 14490 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14491 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14492 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 97%|█████████▋| 14494/15000 [30:02<00:57,  8.81it/s]


 epoch: 14493 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14494 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14495 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 97%|█████████▋| 14498/15000 [30:03<00:51,  9.78it/s]


 epoch: 14496 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14497 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14498 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.8%


 97%|█████████▋| 14500/15000 [30:03<00:54,  9.15it/s]


 epoch: 14499 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

input:       large as the new kingdom twenty fifth dynasty pharaohs built or restored temples and monuments throughout the nile valley

target:      large as the new kingdom twenty fifth dynasty pharaohs built or restored temples and monuments throughout the nile valley including

prediction:  large as the new kingdom twenty fifth dynasty pharaohs built or restored temples and monuments throughout the nile valley the

 epoch: 14500 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%


 97%|█████████▋| 14501/15000 [30:03<00:54,  9.20it/s]


 epoch: 14501 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 97%|█████████▋| 14504/15000 [30:04<01:21,  6.07it/s]


 epoch: 14502 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14503 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14504 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%


 97%|█████████▋| 14506/15000 [30:04<01:09,  7.14it/s]


 epoch: 14505 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14506 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 14507 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 97%|█████████▋| 14508/15000 [30:04<01:00,  8.12it/s]


 epoch: 14508 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14509 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

input:       in gist builds on the traditional simple concept of pastebin by adding version control for code snippets easy forking

target:      in gist builds on the traditional simple concept of pastebin by adding version control for code snippets easy forking and

prediction:  in gist builds on the traditional simple concept of pastebin by adding version control for code snippets easy forking the


 97%|█████████▋| 14511/15000 [30:04<00:56,  8.70it/s]


 epoch: 14510 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14511 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 96.9%

 epoch: 14512 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%


 97%|█████████▋| 14514/15000 [30:05<00:53,  9.12it/s]


 epoch: 14513 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14514 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 97%|█████████▋| 14516/15000 [30:05<00:55,  8.66it/s]


 epoch: 14515 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14516 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 97%|█████████▋| 14518/15000 [30:06<01:28,  5.43it/s]


 epoch: 14517 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14518 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 97%|█████████▋| 14520/15000 [30:06<01:24,  5.71it/s]


 epoch: 14519 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%

input:       the severan dynasty was tumultuous an emperor reign was ended routinely by his murder or execution and following its

target:      the severan dynasty was tumultuous an emperor reign was ended routinely by his murder or execution and following its collapse

prediction:  the severan dynasty was tumultuous an emperor reign was ended routinely by his murder or execution and following its the

 epoch: 14520 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


 97%|█████████▋| 14522/15000 [30:06<01:11,  6.67it/s]


 epoch: 14521 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14522 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.8%


 97%|█████████▋| 14524/15000 [30:06<01:02,  7.67it/s]


 epoch: 14523 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14524 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 97%|█████████▋| 14526/15000 [30:07<00:57,  8.30it/s]


 epoch: 14525 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14526 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%


 97%|█████████▋| 14528/15000 [30:07<00:57,  8.28it/s]


 epoch: 14527 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14528 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%


 97%|█████████▋| 14530/15000 [30:07<01:05,  7.22it/s]


 epoch: 14529 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       raised for charity to gain the attention of colbert the campaign was mentioned on air several times and when

target:      raised for charity to gain the attention of colbert the campaign was mentioned on air several times and when the

prediction:  raised for charity to gain the attention of colbert the campaign was mentioned on air several times and when the

 epoch: 14530 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%


 97%|█████████▋| 14532/15000 [30:08<01:39,  4.68it/s]


 epoch: 14531 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14532 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 97%|█████████▋| 14534/15000 [30:08<01:15,  6.14it/s]


 epoch: 14533 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14534 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 96.9%


 97%|█████████▋| 14536/15000 [30:08<01:03,  7.25it/s]


 epoch: 14535 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14536 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%


 97%|█████████▋| 14538/15000 [30:09<00:59,  7.72it/s]


 epoch: 14537 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 14538 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


 97%|█████████▋| 14540/15000 [30:09<01:02,  7.40it/s]


 epoch: 14539 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

input:       only by indigenous traditions but also by foreign humor circulated via print culture cinema television and the internet during

target:      only by indigenous traditions but also by foreign humor circulated via print culture cinema television and the internet during the

prediction:  only by indigenous traditions but also by foreign humor circulated via print culture cinema television and the internet during the

 epoch: 14540 | train_loss: 0.20, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 97%|█████████▋| 14542/15000 [30:09<00:55,  8.23it/s]


 epoch: 14541 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.4%

 epoch: 14542 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 97%|█████████▋| 14544/15000 [30:09<00:51,  8.86it/s]


 epoch: 14543 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14544 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 97%|█████████▋| 14547/15000 [30:10<01:12,  6.26it/s]


 epoch: 14545 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.26, test_acc: 97.0%

 epoch: 14546 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.3%

 epoch: 14547 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%


 97%|█████████▋| 14549/15000 [30:10<01:00,  7.42it/s]


 epoch: 14548 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14549 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%

input:       nauru and palau all have no presence primarily due to lack of infrastructure and logistical difficulties related to micronesia

target:      nauru and palau all have no presence primarily due to lack of infrastructure and logistical difficulties related to micronesia remoteness

prediction:  nauru and palau all have no presence primarily due to lack of infrastructure and logistical difficulties related to micronesia the


 97%|█████████▋| 14552/15000 [30:10<00:54,  8.25it/s]


 epoch: 14550 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14551 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14552 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


 97%|█████████▋| 14554/15000 [30:11<00:50,  8.80it/s]


 epoch: 14553 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14554 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 14555 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 97%|█████████▋| 14557/15000 [30:11<00:47,  9.34it/s]


 epoch: 14556 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%

 epoch: 14557 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 14558 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 97%|█████████▋| 14560/15000 [30:11<00:49,  8.93it/s]


 epoch: 14559 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       where the earth changes at slow rate and that these changes cannot be observed during one person lifetime aristotle

target:      where the earth changes at slow rate and that these changes cannot be observed during one person lifetime aristotle developed

prediction:  where the earth changes at slow rate and that these changes cannot be observed during one person lifetime aristotle the

 epoch: 14560 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 97%|█████████▋| 14562/15000 [30:11<00:46,  9.51it/s]


 epoch: 14561 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 14562 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14563 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 97%|█████████▋| 14566/15000 [30:12<00:43, 10.08it/s]


 epoch: 14564 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14565 | train_loss: 0.26, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14566 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 97%|█████████▋| 14568/15000 [30:12<00:42, 10.25it/s]


 epoch: 14567 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14568 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14569 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 97%|█████████▋| 14571/15000 [30:12<00:45,  9.49it/s]


input:       school which in the united states informally refers to primary school but in the united kingdom means school that

target:      school which in the united states informally refers to primary school but in the united kingdom means school that selects

prediction:  school which in the united states informally refers to primary school but in the united kingdom means school that the

 epoch: 14570 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14571 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%


 97%|█████████▋| 14572/15000 [30:12<00:45,  9.50it/s]


 epoch: 14572 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 97%|█████████▋| 14574/15000 [30:13<01:20,  5.29it/s]


 epoch: 14573 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14574 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 97%|█████████▋| 14577/15000 [30:13<00:57,  7.40it/s]


 epoch: 14575 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14576 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14577 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 97%|█████████▋| 14579/15000 [30:14<00:50,  8.38it/s]


 epoch: 14578 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14579 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.27, test_acc: 96.8%

input:       discovered by europeans during the th century approximately half of the population on these islands are european australian mainlanders

target:      discovered by europeans during the th century approximately half of the population on these islands are european australian mainlanders with


 97%|█████████▋| 14580/15000 [30:14<00:53,  7.90it/s]


prediction:  discovered by europeans during the th century approximately half of the population on these islands are european australian mainlanders the

 epoch: 14580 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14581 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 97%|█████████▋| 14582/15000 [30:14<00:47,  8.77it/s]


 epoch: 14582 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14583 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%


 97%|█████████▋| 14585/15000 [30:14<00:45,  9.22it/s]


 epoch: 14584 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14585 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14586 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 97%|█████████▋| 14589/15000 [30:15<00:51,  8.01it/s]


 epoch: 14587 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14588 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14589 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 97%|█████████▋| 14590/15000 [30:15<00:53,  7.73it/s]


input:       shifman cites examples of how the meme mutated itself into the cultural sphere mixing with other things going on

target:      shifman cites examples of how the meme mutated itself into the cultural sphere mixing with other things going on at

prediction:  shifman cites examples of how the meme mutated itself into the cultural sphere mixing with other things going on the

 epoch: 14590 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14591 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%


 97%|█████████▋| 14594/15000 [30:15<00:43,  9.26it/s]


 epoch: 14592 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 14593 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14594 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.8%


 97%|█████████▋| 14596/15000 [30:16<00:41,  9.70it/s]


 epoch: 14595 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14596 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14597 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 97%|█████████▋| 14598/15000 [30:16<00:41,  9.58it/s]


 epoch: 14598 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 97%|█████████▋| 14600/15000 [30:16<00:46,  8.60it/s]


 epoch: 14599 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 96.8%

input:       accomplishment for trajan who ordered days of celebration throughout the empire he also constructed trajan column in the middle

target:      accomplishment for trajan who ordered days of celebration throughout the empire he also constructed trajan column in the middle of

prediction:  accomplishment for trajan who ordered days of celebration throughout the empire he also constructed trajan column in the middle the

 epoch: 14600 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.0%


 97%|█████████▋| 14603/15000 [30:16<00:45,  8.76it/s]


 epoch: 14601 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.0%

 epoch: 14602 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14603 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.2%


 97%|█████████▋| 14606/15000 [30:17<00:40,  9.67it/s]


 epoch: 14604 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14605 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14606 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


 97%|█████████▋| 14608/15000 [30:17<00:39,  9.90it/s]


 epoch: 14607 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14608 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14609 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%


 97%|█████████▋| 14610/15000 [30:17<00:41,  9.39it/s]


input:       work was the first narrative of modern geology based on the unity of processes in time and explanation of

target:      work was the first narrative of modern geology based on the unity of processes in time and explanation of the

prediction:  work was the first narrative of modern geology based on the unity of processes in time and explanation of the

 epoch: 14610 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14611 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 97%|█████████▋| 14613/15000 [30:17<00:40,  9.66it/s]


 epoch: 14612 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.1%

 epoch: 14613 | train_loss: 0.20, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 97%|█████████▋| 14614/15000 [30:18<00:39,  9.71it/s]


 epoch: 14614 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 97%|█████████▋| 14617/15000 [30:18<01:02,  6.14it/s]


 epoch: 14615 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14616 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 14617 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


 97%|█████████▋| 14618/15000 [30:18<00:56,  6.71it/s]


 epoch: 14618 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14619 | train_loss: 0.21, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

input:       one of the largest producers in the world niobium concentrates of reserves known to the world and nickel in

target:      one of the largest producers in the world niobium concentrates of reserves known to the world and nickel in terms

prediction:  one of the largest producers in the world niobium concentrates of reserves known to the world and nickel in the


 97%|█████████▋| 14621/15000 [30:19<00:50,  7.54it/s]


 epoch: 14620 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14621 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


 97%|█████████▋| 14623/15000 [30:19<00:47,  7.92it/s]


 epoch: 14622 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14623 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 98%|█████████▊| 14625/15000 [30:19<00:45,  8.23it/s]


 epoch: 14624 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14625 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%


 98%|█████████▊| 14627/15000 [30:19<00:44,  8.47it/s]


 epoch: 14626 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14627 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 98%|█████████▊| 14628/15000 [30:20<00:42,  8.68it/s]


 epoch: 14628 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 98%|█████████▊| 14630/15000 [30:20<01:19,  4.67it/s]


 epoch: 14629 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

input:       of space during the earliest moments however physics currently lacks widely accepted theory of quantum gravity that can successfully

target:      of space during the earliest moments however physics currently lacks widely accepted theory of quantum gravity that can successfully model

prediction:  of space during the earliest moments however physics currently lacks widely accepted theory of quantum gravity that can successfully the

 epoch: 14630 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 98%|█████████▊| 14632/15000 [30:20<01:04,  5.74it/s]


 epoch: 14631 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14632 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%


 98%|█████████▊| 14634/15000 [30:21<00:53,  6.81it/s]


 epoch: 14633 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14634 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.1%


 98%|█████████▊| 14636/15000 [30:21<00:49,  7.31it/s]


 epoch: 14635 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14636 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 98%|█████████▊| 14638/15000 [30:21<00:47,  7.68it/s]


 epoch: 14637 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14638 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 96.9%


 98%|█████████▊| 14640/15000 [30:22<00:52,  6.87it/s]


 epoch: 14639 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.3%

input:       lava flows and are incorporated later to cool in the matrix as result xenoliths are older than the rock

target:      lava flows and are incorporated later to cool in the matrix as result xenoliths are older than the rock that

prediction:  lava flows and are incorporated later to cool in the matrix as result xenoliths are older than the rock the

 epoch: 14640 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.3%


 98%|█████████▊| 14642/15000 [30:22<00:46,  7.64it/s]


 epoch: 14641 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14642 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14643/15000 [30:22<00:45,  7.85it/s]


 epoch: 14643 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 98%|█████████▊| 14645/15000 [30:22<00:59,  5.93it/s]


 epoch: 14644 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14645 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.8%


 98%|█████████▊| 14647/15000 [30:23<00:52,  6.76it/s]


 epoch: 14646 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14647 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14648/15000 [30:23<00:47,  7.44it/s]


 epoch: 14648 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14649 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

input:       known as the great european plain and at its heart lies the north german plain an arc of uplands

target:      known as the great european plain and at its heart lies the north german plain an arc of uplands also

prediction:  known as the great european plain and at its heart lies the north german plain an arc of uplands the


 98%|█████████▊| 14651/15000 [30:23<00:42,  8.12it/s]


 epoch: 14650 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

 epoch: 14651 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 98%|█████████▊| 14654/15000 [30:23<00:37,  9.17it/s]


 epoch: 14652 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14653 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14654 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%


 98%|█████████▊| 14656/15000 [30:24<00:36,  9.48it/s]


 epoch: 14655 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14656 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 98%|█████████▊| 14657/15000 [30:24<00:36,  9.42it/s]


 epoch: 14657 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14658/15000 [30:24<01:18,  4.36it/s]


 epoch: 14658 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 14659 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

input:       all my projects after myself first linux now git the man page describes git as the stupid content tracker

target:      all my projects after myself first linux now git the man page describes git as the stupid content tracker the


 98%|█████████▊| 14660/15000 [30:24<01:02,  5.41it/s]


prediction:  all my projects after myself first linux now git the man page describes git as the stupid content tracker the

 epoch: 14660 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.3%

 epoch: 14661 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 98%|█████████▊| 14664/15000 [30:25<00:44,  7.58it/s]


 epoch: 14662 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14663 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14664 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%


 98%|█████████▊| 14667/15000 [30:25<00:38,  8.57it/s]


 epoch: 14665 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14666 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14667 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 98%|█████████▊| 14668/15000 [30:25<00:37,  8.76it/s]


 epoch: 14668 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14669 | train_loss: 0.18, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.1%

input:       in various developing countries and many efforts are made to address it like the one laptop per child initiative

target:      in various developing countries and many efforts are made to address it like the one laptop per child initiative closely

prediction:  in various developing countries and many efforts are made to address it like the one laptop per child initiative the


 98%|█████████▊| 14670/15000 [30:25<00:38,  8.52it/s]


 epoch: 14670 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14671 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.3%


 98%|█████████▊| 14674/15000 [30:26<00:51,  6.29it/s]


 epoch: 14672 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14673 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14674 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 98%|█████████▊| 14677/15000 [30:27<00:41,  7.73it/s]


 epoch: 14675 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14676 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14677 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%


 98%|█████████▊| 14679/15000 [30:27<00:37,  8.48it/s]


 epoch: 14678 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14679 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

input:       time periods the geographic area that would later become the united states has been the source of more varieties

target:      time periods the geographic area that would later become the united states has been the source of more varieties of

prediction:  time periods the geographic area that would later become the united states has been the source of more varieties the


 98%|█████████▊| 14682/15000 [30:27<00:36,  8.80it/s]


 epoch: 14680 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 14681 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14682 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 98%|█████████▊| 14685/15000 [30:27<00:33,  9.38it/s]


 epoch: 14683 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14684 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14685 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14686/15000 [30:28<00:33,  9.46it/s]


 epoch: 14686 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


 98%|█████████▊| 14689/15000 [30:28<00:50,  6.17it/s]


 epoch: 14687 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14688 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14689 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 98%|█████████▊| 14690/15000 [30:28<00:49,  6.26it/s]


input:       coral sea and the kokoda track campaign before they were finally defeated in some of the most prominent oceanic

target:      coral sea and the kokoda track campaign before they were finally defeated in some of the most prominent oceanic battlegrounds

prediction:  coral sea and the kokoda track campaign before they were finally defeated in some of the most prominent oceanic the

 epoch: 14690 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14691 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14694/15000 [30:29<00:36,  8.44it/s]


 epoch: 14692 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14693 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14694 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14697/15000 [30:29<00:32,  9.20it/s]


 epoch: 14695 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14696 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14697 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 98%|█████████▊| 14699/15000 [30:29<00:31,  9.45it/s]


 epoch: 14698 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14699 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

input:       big bang clarification needed this primordial singularity is itself sometimes called the big bang but the term can also

target:      big bang clarification needed this primordial singularity is itself sometimes called the big bang but the term can also refer

prediction:  big bang clarification needed this primordial singularity is itself sometimes called the big bang but the term can also the


 98%|█████████▊| 14700/15000 [30:29<00:34,  8.72it/s]


 epoch: 14700 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 98%|█████████▊| 14703/15000 [30:30<00:47,  6.24it/s]


 epoch: 14701 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14702 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14703 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 98%|█████████▊| 14706/15000 [30:30<00:37,  7.91it/s]


 epoch: 14704 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.26, test_acc: 97.1%

 epoch: 14705 | train_loss: 0.24, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14706 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 98%|█████████▊| 14708/15000 [30:31<00:33,  8.72it/s]


 epoch: 14707 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14708 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14709 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 98%|█████████▊| 14711/15000 [30:31<00:32,  8.80it/s]


input:       who specializes in the field of physics is called physicist physics is one of the oldest academic disciplines and

target:      who specializes in the field of physics is called physicist physics is one of the oldest academic disciplines and through

prediction:  who specializes in the field of physics is called physicist physics is one of the oldest academic disciplines and the

 epoch: 14710 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14711 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14713/15000 [30:31<00:31,  9.01it/s]


 epoch: 14712 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14713 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14714 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 98%|█████████▊| 14717/15000 [30:32<00:36,  7.85it/s]


 epoch: 14715 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14716 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14717 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 96.9%


 98%|█████████▊| 14719/15000 [30:32<00:32,  8.66it/s]


 epoch: 14718 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14719 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       australia new zealand hawaii and many other territories french in new caledonia french polynesia wallis and futuna japanese in

target:      australia new zealand hawaii and many other territories french in new caledonia french polynesia wallis and futuna japanese in the

prediction:  australia new zealand hawaii and many other territories french in new caledonia french polynesia wallis and futuna japanese in the


 98%|█████████▊| 14722/15000 [30:32<00:30,  9.07it/s]


 epoch: 14720 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14721 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14722 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 98%|█████████▊| 14725/15000 [30:33<00:28,  9.54it/s]


 epoch: 14723 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14724 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14725 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.2%


 98%|█████████▊| 14727/15000 [30:33<00:29,  9.34it/s]


 epoch: 14726 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 14727 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14728/15000 [30:33<00:29,  9.07it/s]


 epoch: 14728 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 98%|█████████▊| 14730/15000 [30:34<00:59,  4.56it/s]


 epoch: 14729 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.3%

input:       both folding and metamorphism of the rocks this metamorphism causes changes in the mineral composition of the rocks creates

target:      both folding and metamorphism of the rocks this metamorphism causes changes in the mineral composition of the rocks creates foliation

prediction:  both folding and metamorphism of the rocks this metamorphism causes changes in the mineral composition of the rocks creates the

 epoch: 14730 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 98%|█████████▊| 14732/15000 [30:34<00:45,  5.92it/s]


 epoch: 14731 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14732 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


 98%|█████████▊| 14734/15000 [30:34<00:38,  6.90it/s]


 epoch: 14733 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14734 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 98%|█████████▊| 14736/15000 [30:34<00:35,  7.46it/s]


 epoch: 14735 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%

 epoch: 14736 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 98%|█████████▊| 14738/15000 [30:35<00:33,  7.74it/s]


 epoch: 14737 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14738 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%


 98%|█████████▊| 14740/15000 [30:35<00:38,  6.84it/s]


 epoch: 14739 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       of the chemical elements interacting with the light these redshifts are uniformly isotropic distributed evenly among the observed objects

target:      of the chemical elements interacting with the light these redshifts are uniformly isotropic distributed evenly among the observed objects in

prediction:  of the chemical elements interacting with the light these redshifts are uniformly isotropic distributed evenly among the observed objects the

 epoch: 14740 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 98%|█████████▊| 14742/15000 [30:35<00:33,  7.62it/s]


 epoch: 14741 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14742 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%


 98%|█████████▊| 14744/15000 [30:36<00:48,  5.32it/s]


 epoch: 14743 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14744 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 98%|█████████▊| 14746/15000 [30:36<00:40,  6.21it/s]


 epoch: 14745 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 14746 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 98%|█████████▊| 14748/15000 [30:36<00:37,  6.77it/s]


 epoch: 14747 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14748 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14749/15000 [30:36<00:35,  7.10it/s]


 epoch: 14749 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

input:       food process ores and convert materials into other materials players may also exchange goods with villager npc through trading

target:      food process ores and convert materials into other materials players may also exchange goods with villager npc through trading system

prediction:  food process ores and convert materials into other materials players may also exchange goods with villager npc through trading the


 98%|█████████▊| 14751/15000 [30:37<00:35,  6.95it/s]


 epoch: 14750 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14751 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14754/15000 [30:37<00:28,  8.67it/s]


 epoch: 14752 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14753 | train_loss: 0.19, train_acc: 97.4% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14754 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 98%|█████████▊| 14755/15000 [30:37<00:27,  8.91it/s]


 epoch: 14755 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 14756 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 98%|█████████▊| 14758/15000 [30:38<00:36,  6.55it/s]


 epoch: 14757 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14758 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%


 98%|█████████▊| 14760/15000 [30:38<00:35,  6.77it/s]


 epoch: 14759 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

input:       aurelius this had expanded to circus games were preceded by an elaborate parade pompa circensis that ended at the

target:      aurelius this had expanded to circus games were preceded by an elaborate parade pompa circensis that ended at the venue

prediction:  aurelius this had expanded to circus games were preceded by an elaborate parade pompa circensis that ended at the the

 epoch: 14760 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 98%|█████████▊| 14762/15000 [30:38<00:29,  7.96it/s]


 epoch: 14761 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 14762 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14763 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%


 98%|█████████▊| 14766/15000 [30:39<00:25,  9.30it/s]


 epoch: 14764 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14765 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14766 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


 98%|█████████▊| 14768/15000 [30:39<00:24,  9.39it/s]


 epoch: 14767 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14768 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14769 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 98%|█████████▊| 14771/15000 [30:39<00:25,  8.89it/s]


input:       and special relativity general relativity allowed for dynamical curved spacetime with which highly massive systems and the large scale

target:      and special relativity general relativity allowed for dynamical curved spacetime with which highly massive systems and the large scale structure

prediction:  and special relativity general relativity allowed for dynamical curved spacetime with which highly massive systems and the large scale the

 epoch: 14770 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14771 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%


 98%|█████████▊| 14773/15000 [30:40<00:40,  5.61it/s]


 epoch: 14772 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14773 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%

 epoch: 14774 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.26, test_acc: 97.0%


 99%|█████████▊| 14777/15000 [30:40<00:28,  7.90it/s]


 epoch: 14775 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14776 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.3%

 epoch: 14777 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.3%


 99%|█████████▊| 14778/15000 [30:40<00:26,  8.24it/s]


 epoch: 14778 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14779 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

input:       as higher self esteem lower levels of depression anxiety and perceived stress and more positive self concept as well

target:      as higher self esteem lower levels of depression anxiety and perceived stress and more positive self concept as well as

prediction:  as higher self esteem lower levels of depression anxiety and perceived stress and more positive self concept as well the


 99%|█████████▊| 14782/15000 [30:41<00:24,  8.88it/s]


 epoch: 14780 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14781 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14782 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 99%|█████████▊| 14784/15000 [30:41<00:23,  9.09it/s]


 epoch: 14783 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14784 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14785 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%


 99%|█████████▊| 14788/15000 [30:41<00:25,  8.46it/s]


 epoch: 14786 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.8%

 epoch: 14787 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14788 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 99%|█████████▊| 14790/15000 [30:42<00:25,  8.12it/s]


 epoch: 14789 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

input:       states in the four corners region in present day southwestern united states the culture of ancestral puebloans developed over

target:      states in the four corners region in present day southwestern united states the culture of ancestral puebloans developed over centuries

prediction:  states in the four corners region in present day southwestern united states the culture of ancestral puebloans developed over the

 epoch: 14790 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 99%|█████████▊| 14793/15000 [30:42<00:22,  9.15it/s]


 epoch: 14791 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14792 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14793 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.0%


 99%|█████████▊| 14795/15000 [30:42<00:21,  9.47it/s]


 epoch: 14794 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14795 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 99%|█████████▊| 14797/15000 [30:42<00:21,  9.39it/s]


 epoch: 14796 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14797 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 99%|█████████▊| 14798/15000 [30:42<00:21,  9.37it/s]


 epoch: 14798 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%

 epoch: 14799 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

input:       becoming dominant in athenian affairs in bc the armies of philip ii defeated athens at the battle of chaeronea

target:      becoming dominant in athenian affairs in bc the armies of philip ii defeated athens at the battle of chaeronea effectively

prediction:  becoming dominant in athenian affairs in bc the armies of philip ii defeated athens at the battle of chaeronea the


 99%|█████████▊| 14801/15000 [30:43<00:36,  5.48it/s]


 epoch: 14800 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14801 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14802 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 99%|█████████▊| 14805/15000 [30:44<00:24,  7.90it/s]


 epoch: 14803 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14804 | train_loss: 0.20, train_acc: 97.4% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14805 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%


 99%|█████████▊| 14808/15000 [30:44<00:21,  8.83it/s]


 epoch: 14806 | train_loss: 0.27, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14807 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14808 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.1%


 99%|█████████▊| 14810/15000 [30:44<00:22,  8.50it/s]


 epoch: 14809 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       oaxaca valley at the site of monte alban monte alban grew to around residents in the period around ad

target:      oaxaca valley at the site of monte alban monte alban grew to around residents in the period around ad with

prediction:  oaxaca valley at the site of monte alban monte alban grew to around residents in the period around ad the

 epoch: 14810 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 99%|█████████▊| 14812/15000 [30:44<00:21,  8.89it/s]


 epoch: 14811 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14812 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14813 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 99%|█████████▉| 14816/15000 [30:45<00:28,  6.43it/s]


 epoch: 14814 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14815 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14816 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 99%|█████████▉| 14819/15000 [30:45<00:22,  8.07it/s]


 epoch: 14817 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14818 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14819 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.4%


 99%|█████████▉| 14820/15000 [30:46<00:23,  7.57it/s]


input:       in the world the four major broadcasters in the are the national broadcasting company nbc columbia broadcasting system

target:      in the world the four major broadcasters in the are the national broadcasting company nbc columbia broadcasting system cbs

prediction:  in the world the four major broadcasters in the are the national broadcasting company nbc columbia broadcasting system the

 epoch: 14820 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14821 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


 99%|█████████▉| 14824/15000 [30:46<00:19,  9.18it/s]


 epoch: 14822 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14823 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 96.9%

 epoch: 14824 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.0%


 99%|█████████▉| 14826/15000 [30:46<00:18,  9.20it/s]


 epoch: 14825 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14826 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.1%


 99%|█████████▉| 14829/15000 [30:46<00:17,  9.64it/s]


 epoch: 14827 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14828 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14829 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 99%|█████████▉| 14831/15000 [30:47<00:19,  8.70it/s]


input:       new zealand parliament holds legislative power and consists of the king and the house of representatives parliamentary general election

target:      new zealand parliament holds legislative power and consists of the king and the house of representatives parliamentary general election must

prediction:  new zealand parliament holds legislative power and consists of the king and the house of representatives parliamentary general election the

 epoch: 14830 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14831 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


 99%|█████████▉| 14833/15000 [30:47<00:18,  9.06it/s]


 epoch: 14832 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14833 | train_loss: 0.25, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 99%|█████████▉| 14835/15000 [30:47<00:18,  8.83it/s]


 epoch: 14834 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14835 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.3%


 99%|█████████▉| 14837/15000 [30:47<00:17,  9.17it/s]


 epoch: 14836 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14837 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


 99%|█████████▉| 14839/15000 [30:48<00:18,  8.63it/s]


 epoch: 14838 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14839 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.1%


 99%|█████████▉| 14840/15000 [30:48<00:22,  7.26it/s]


input:       is younger than the one beneath it and older than the one above it logically younger layer cannot slip

target:      is younger than the one beneath it and older than the one above it logically younger layer cannot slip beneath

prediction:  is younger than the one beneath it and older than the one above it logically younger layer cannot slip the

 epoch: 14840 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.2%


 99%|█████████▉| 14842/15000 [30:48<00:20,  7.59it/s]


 epoch: 14841 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%

 epoch: 14842 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%


 99%|█████████▉| 14844/15000 [30:49<00:33,  4.59it/s]


 epoch: 14843 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14844 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 99%|█████████▉| 14846/15000 [30:49<00:25,  6.00it/s]


 epoch: 14845 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14846 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%


 99%|█████████▉| 14848/15000 [30:49<00:21,  7.10it/s]


 epoch: 14847 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14848 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 99%|█████████▉| 14850/15000 [30:50<00:22,  6.66it/s]


 epoch: 14849 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

input:       of its equation of state and relationship with the standard model of particle physics continue to be investigated both

target:      of its equation of state and relationship with the standard model of particle physics continue to be investigated both through

prediction:  of its equation of state and relationship with the standard model of particle physics continue to be investigated both the

 epoch: 14850 | train_loss: 0.18, train_acc: 97.4% | test_loss: 0.21, test_acc: 97.3%


 99%|█████████▉| 14852/15000 [30:50<00:19,  7.57it/s]


 epoch: 14851 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.25, test_acc: 97.0%

 epoch: 14852 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


 99%|█████████▉| 14854/15000 [30:50<00:18,  7.82it/s]


 epoch: 14853 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14854 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.20, test_acc: 97.2%


 99%|█████████▉| 14856/15000 [30:50<00:17,  8.17it/s]


 epoch: 14855 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14856 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 99%|█████████▉| 14859/15000 [30:51<00:25,  5.52it/s]


 epoch: 14857 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14858 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.0%

 epoch: 14859 | train_loss: 0.25, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


 99%|█████████▉| 14861/15000 [30:51<00:21,  6.41it/s]


input:       the bering strait during the early middle holocene prior to the arrival of european explorers and colonists in north

target:      the bering strait during the early middle holocene prior to the arrival of european explorers and colonists in north america

prediction:  the bering strait during the early middle holocene prior to the arrival of european explorers and colonists in north the

 epoch: 14860 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14861 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%


 99%|█████████▉| 14863/15000 [30:51<00:17,  7.65it/s]


 epoch: 14862 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14863 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14864 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.1%


 99%|█████████▉| 14867/15000 [30:52<00:14,  9.35it/s]


 epoch: 14865 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14866 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14867 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 99%|█████████▉| 14869/15000 [30:52<00:13,  9.49it/s]


 epoch: 14868 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14869 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       praised github by stating the hosting of github is excellent they ve done good job on that think github

target:      praised github by stating the hosting of github is excellent they ve done good job on that think github should


 99%|█████████▉| 14871/15000 [30:52<00:14,  8.72it/s]


prediction:  praised github by stating the hosting of github is excellent they ve done good job on that think github the

 epoch: 14870 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14871 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 99%|█████████▉| 14874/15000 [30:53<00:19,  6.45it/s]


 epoch: 14872 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14873 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14874 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%


 99%|█████████▉| 14876/15000 [30:53<00:16,  7.42it/s]


 epoch: 14875 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.26, test_acc: 97.0%

 epoch: 14876 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%


 99%|█████████▉| 14879/15000 [30:53<00:14,  8.56it/s]


 epoch: 14877 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14878 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14879 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%


 99%|█████████▉| 14880/15000 [30:54<00:14,  8.09it/s]


input:       cloud computing services alongside the development of and contributions to open source software harvard business review argued that microsoft

target:      cloud computing services alongside the development of and contributions to open source software harvard business review argued that microsoft was

prediction:  cloud computing services alongside the development of and contributions to open source software harvard business review argued that microsoft the

 epoch: 14880 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14881 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 99%|█████████▉| 14883/15000 [30:54<00:12,  9.01it/s]


 epoch: 14882 | train_loss: 0.22, train_acc: 96.8% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14883 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.0%


 99%|█████████▉| 14884/15000 [30:54<00:12,  9.12it/s]


 epoch: 14884 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 99%|█████████▉| 14886/15000 [30:55<00:20,  5.53it/s]


 epoch: 14885 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14886 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14887 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.0%


 99%|█████████▉| 14888/15000 [30:55<00:15,  7.03it/s]


 epoch: 14888 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14889 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

input:       survey derived the following factor analytical scales the non violent and law abiding society democracy movement climate of personal

target:      survey derived the following factor analytical scales the non violent and law abiding society democracy movement climate of personal non

prediction:  survey derived the following factor analytical scales the non violent and law abiding society democracy movement climate of personal the


 99%|█████████▉| 14892/15000 [30:55<00:12,  8.31it/s]


 epoch: 14890 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14891 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14892 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 99%|█████████▉| 14895/15000 [30:56<00:11,  9.10it/s]


 epoch: 14893 | train_loss: 0.24, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14894 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.25, test_acc: 96.9%

 epoch: 14895 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


 99%|█████████▉| 14897/15000 [30:56<00:11,  9.22it/s]


 epoch: 14896 | train_loss: 0.23, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14897 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%


 99%|█████████▉| 14898/15000 [30:56<00:11,  9.11it/s]


 epoch: 14898 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


 99%|█████████▉| 14900/15000 [30:57<00:20,  4.80it/s]


 epoch: 14899 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       by ulisse aldrovandi in then by jean andr deluc in and introduced as fixed term by horace dict

target:      by ulisse aldrovandi in then by jean andr deluc in and introduced as fixed term by horace dict de

prediction:  by ulisse aldrovandi in then by jean andr deluc in and introduced as fixed term by horace dict the

 epoch: 14900 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%


 99%|█████████▉| 14902/15000 [30:57<00:15,  6.29it/s]


 epoch: 14901 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.4%

 epoch: 14902 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14903 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


 99%|█████████▉| 14906/15000 [30:57<00:11,  8.52it/s]


 epoch: 14904 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14905 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.1%

 epoch: 14906 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 99%|█████████▉| 14909/15000 [30:57<00:09,  9.27it/s]


 epoch: 14907 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.20, test_acc: 97.2%

 epoch: 14908 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14909 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


 99%|█████████▉| 14911/15000 [30:58<00:10,  8.74it/s]


input:       west coast faces the southern ocean the coral reefs of the south pacific are low lying structures that have

target:      west coast faces the southern ocean the coral reefs of the south pacific are low lying structures that have built

prediction:  west coast faces the southern ocean the coral reefs of the south pacific are low lying structures that have the

 epoch: 14910 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14911 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%


 99%|█████████▉| 14912/15000 [30:58<00:09,  8.93it/s]


 epoch: 14912 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


 99%|█████████▉| 14914/15000 [30:58<00:16,  5.21it/s]


 epoch: 14913 | train_loss: 0.21, train_acc: 97.4% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14914 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


 99%|█████████▉| 14918/15000 [30:59<00:10,  7.95it/s]


 epoch: 14915 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14916 | train_loss: 0.26, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14917 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14918 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%


 99%|█████████▉| 14920/15000 [30:59<00:09,  8.30it/s]


 epoch: 14919 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

input:       had an advantage but not those of medium height finally in stabilising selection there is selection against extreme trait

target:      had an advantage but not those of medium height finally in stabilising selection there is selection against extreme trait values

prediction:  had an advantage but not those of medium height finally in stabilising selection there is selection against extreme trait the

 epoch: 14920 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


 99%|█████████▉| 14923/15000 [30:59<00:08,  9.10it/s]


 epoch: 14921 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14922 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14923 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.0%


100%|█████████▉| 14925/15000 [31:00<00:08,  9.35it/s]


 epoch: 14924 | train_loss: 0.25, train_acc: 96.8% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14925 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%


100%|█████████▉| 14926/15000 [31:00<00:07,  9.39it/s]


 epoch: 14926 | train_loss: 0.23, train_acc: 96.9% | test_loss: 0.25, test_acc: 97.0%


100%|█████████▉| 14929/15000 [31:00<00:12,  5.75it/s]


 epoch: 14927 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14928 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.3%

 epoch: 14929 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.22, test_acc: 97.0%


100%|█████████▉| 14931/15000 [31:01<00:10,  6.51it/s]


input:       to inform leonardo brothers of his death described leonardo feelings for his pupils as both loving and passionate it

target:      to inform leonardo brothers of his death described leonardo feelings for his pupils as both loving and passionate it has

prediction:  to inform leonardo brothers of his death described leonardo feelings for his pupils as both loving and passionate it the

 epoch: 14930 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14931 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.1%


100%|█████████▉| 14933/15000 [31:01<00:09,  7.21it/s]


 epoch: 14932 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.8%

 epoch: 14933 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.1%


100%|█████████▉| 14935/15000 [31:01<00:08,  7.60it/s]


 epoch: 14934 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14935 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


100%|█████████▉| 14937/15000 [31:01<00:08,  7.45it/s]


 epoch: 14936 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14937 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.25, test_acc: 97.1%


100%|█████████▉| 14939/15000 [31:02<00:07,  7.96it/s]


 epoch: 14938 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.1%

 epoch: 14939 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%

input:       german colony australian forces attacked german new guinea in september company of australians and british warship besieged the germans


100%|█████████▉| 14941/15000 [31:02<00:08,  7.34it/s]


target:      german colony australian forces attacked german new guinea in september company of australians and british warship besieged the germans and

prediction:  german colony australian forces attacked german new guinea in september company of australians and british warship besieged the germans the

 epoch: 14940 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14941 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.27, test_acc: 97.1%


100%|█████████▉| 14943/15000 [31:02<00:08,  6.97it/s]


 epoch: 14942 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14943 | train_loss: 0.20, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


100%|█████████▉| 14945/15000 [31:03<00:07,  7.64it/s]


 epoch: 14944 | train_loss: 0.24, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14945 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.22, test_acc: 97.1%


100%|█████████▉| 14947/15000 [31:03<00:06,  8.22it/s]


 epoch: 14946 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14947 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14948 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%


100%|█████████▉| 14950/15000 [31:03<00:06,  8.04it/s]


 epoch: 14949 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       is chained together when language can run its commands through an interpreter such as unix shell or other command

target:      is chained together when language can run its commands through an interpreter such as unix shell or other command line

prediction:  is chained together when language can run its commands through an interpreter such as unix shell or other command the

 epoch: 14950 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.3%


100%|█████████▉| 14952/15000 [31:03<00:05,  8.49it/s]


 epoch: 14951 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14952 | train_loss: 0.25, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


100%|█████████▉| 14954/15000 [31:04<00:05,  8.12it/s]


 epoch: 14953 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14954 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.21, test_acc: 97.2%


100%|█████████▉| 14955/15000 [31:04<00:05,  8.14it/s]


 epoch: 14955 | train_loss: 0.23, train_acc: 96.8% | test_loss: 0.24, test_acc: 97.0%


100%|█████████▉| 14957/15000 [31:04<00:08,  4.87it/s]


 epoch: 14956 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%

 epoch: 14957 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%


100%|█████████▉| 14959/15000 [31:05<00:06,  6.25it/s]


 epoch: 14958 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.24, test_acc: 97.0%

 epoch: 14959 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.2%

input:       common verbs are provided as an enum if cmdlet receives either pipeline input or command line parameter input there

target:      common verbs are provided as an enum if cmdlet receives either pipeline input or command line parameter input there must


100%|█████████▉| 14960/15000 [31:05<00:06,  6.36it/s]


prediction:  common verbs are provided as an enum if cmdlet receives either pipeline input or command line parameter input there the

 epoch: 14960 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.2%

 epoch: 14961 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.25, test_acc: 96.9%


100%|█████████▉| 14964/15000 [31:05<00:04,  8.71it/s]


 epoch: 14962 | train_loss: 0.19, train_acc: 97.6% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14963 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14964 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


100%|█████████▉| 14966/15000 [31:05<00:03,  9.10it/s]


 epoch: 14965 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.23, test_acc: 96.9%

 epoch: 14966 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%


100%|█████████▉| 14968/15000 [31:06<00:03,  9.27it/s]


 epoch: 14967 | train_loss: 0.23, train_acc: 97.0% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14968 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.24, test_acc: 97.2%


100%|█████████▉| 14969/15000 [31:06<00:03,  9.22it/s]


 epoch: 14969 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.23, test_acc: 97.2%

input:       where it was elaborated upon by arabic writers and islamic philosophers such as abu bishr and his pupils al

target:      where it was elaborated upon by arabic writers and islamic philosophers such as abu bishr and his pupils al farabi

prediction:  where it was elaborated upon by arabic writers and islamic philosophers such as abu bishr and his pupils al the


100%|█████████▉| 14972/15000 [31:06<00:05,  5.54it/s]


 epoch: 14970 | train_loss: 0.22, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14971 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

 epoch: 14972 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.1%


100%|█████████▉| 14974/15000 [31:07<00:03,  6.65it/s]


 epoch: 14973 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.25, test_acc: 96.9%

 epoch: 14974 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.23, test_acc: 97.2%


100%|█████████▉| 14976/15000 [31:07<00:03,  7.44it/s]


 epoch: 14975 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.3%

 epoch: 14976 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.3%


100%|█████████▉| 14978/15000 [31:07<00:02,  8.33it/s]


 epoch: 14977 | train_loss: 0.22, train_acc: 97.3% | test_loss: 0.21, test_acc: 97.2%

 epoch: 14978 | train_loss: 0.24, train_acc: 97.2% | test_loss: 0.23, test_acc: 96.9%


100%|█████████▉| 14980/15000 [31:07<00:02,  7.84it/s]


 epoch: 14979 | train_loss: 0.24, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%

input:       other for control of the empire and neglected their duties in preventing invasions provincials became victims of frequent raids

target:      other for control of the empire and neglected their duties in preventing invasions provincials became victims of frequent raids by

prediction:  other for control of the empire and neglected their duties in preventing invasions provincials became victims of frequent raids the

 epoch: 14980 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


100%|█████████▉| 14982/15000 [31:08<00:02,  8.89it/s]


 epoch: 14981 | train_loss: 0.21, train_acc: 97.3% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14982 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14983 | train_loss: 0.21, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.1%


100%|█████████▉| 14984/15000 [31:08<00:01,  9.01it/s]


 epoch: 14984 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.21, test_acc: 97.0%


100%|█████████▉| 14986/15000 [31:08<00:02,  5.66it/s]


 epoch: 14985 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14986 | train_loss: 0.21, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.1%

 epoch: 14987 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.1%

100%|█████████▉| 14988/15000 [31:09<00:01,  7.07it/s]



 epoch: 14988 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%


100%|█████████▉| 14990/15000 [31:09<00:01,  7.17it/s]


 epoch: 14989 | train_loss: 0.21, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.1%

input:       world many small languages are becoming endangered as their speakers shift to other languages that afford the possibility to

target:      world many small languages are becoming endangered as their speakers shift to other languages that afford the possibility to participate

prediction:  world many small languages are becoming endangered as their speakers shift to other languages that afford the possibility to the

 epoch: 14990 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.1%


100%|█████████▉| 14992/15000 [31:09<00:00,  8.34it/s]


 epoch: 14991 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.3%

 epoch: 14992 | train_loss: 0.24, train_acc: 96.9% | test_loss: 0.24, test_acc: 97.1%

 epoch: 14993 | train_loss: 0.24, train_acc: 97.1% | test_loss: 0.25, test_acc: 97.0%


100%|█████████▉| 14996/15000 [31:09<00:00,  9.47it/s]


 epoch: 14994 | train_loss: 0.22, train_acc: 97.2% | test_loss: 0.22, test_acc: 97.2%

 epoch: 14995 | train_loss: 0.23, train_acc: 96.7% | test_loss: 0.23, test_acc: 97.1%

 epoch: 14996 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.22, test_acc: 97.2%


100%|█████████▉| 14998/15000 [31:10<00:00,  9.23it/s]


 epoch: 14997 | train_loss: 0.22, train_acc: 97.0% | test_loss: 0.24, test_acc: 97.2%

 epoch: 14998 | train_loss: 0.22, train_acc: 97.1% | test_loss: 0.24, test_acc: 97.0%


100%|██████████| 15000/15000 [31:10<00:00,  8.02it/s]


 epoch: 14999 | train_loss: 0.23, train_acc: 97.1% | test_loss: 0.23, test_acc: 97.0%

input:       formal education this became necessary since the amount of knowledge grew as civilizations evolved and informal education proved insufficient

target:      formal education this became necessary since the amount of knowledge grew as civilizations evolved and informal education proved insufficient to

prediction:  formal education this became necessary since the amount of knowledge grew as civilizations evolved and informal education proved insufficient the

 epoch: 15000 | train_loss: 0.23, train_acc: 97.2% | test_loss: 0.23, test_acc: 97.0%





In [None]:
line="spez"
for _ in range(10):
  print(len(line.split(" ")))
  print(line)
  with torch.inference_mode():
    line=make_prediction(line)