In [32]:
import torch
import pandas as pd
from tqdm import tqdm 
import torch
import time
# We used pip install transformers, pip install sentencepiece
from transformers import BertTokenizer, BertForSequenceClassification, AlbertTokenizer, AlbertForSequenceClassification
from transformers import AdamW
from transformers import T5Tokenizer, T5ForConditionalGeneration

from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import accuracy_score
import plotly.express as px

In [2]:
data_path = 'projectData/'
tb_dir = 'tbs_HW2/'

In [3]:
#Define our device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
if torch.cuda.is_available():
    print(f'using: {torch.cuda.get_device_name(0)}')
else:
    print('using cpu')

using: NVIDIA GeForce GTX 970


### Load Data

In [4]:
raw_data = pd.read_csv(data_path+'dataset_raw.csv')

In [5]:
#Shuffle the data
raw_data = raw_data.sample(frac = 1)

In [6]:
#Create dataser class for T5
class SimpleDataset(torch.utils.data.Dataset):
    def __init__(self, data ,label):
        self.data = data
        self.label = label
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        X = self.data[index]
        y = self.label[index]
        return X, y

In [7]:
def get_data(input_df, train_size,test_size,batch_size):
    train_end_idx = train_size
    test_end_idx = train_size + test_size
    #Make one list for all the reviews
    reviews = input_df['review'].tolist()

    #mini sample of reviews for train and test
    train_data = reviews[:train_end_idx]
    test_data = reviews[train_end_idx:test_end_idx]
    #Take mini sample of the labels and preprocess them such that we can use them in the model training loop
    labels = input_df['label'].tolist()
    train_labels = labels[:train_end_idx]
    test_labels = labels[train_end_idx:test_end_idx]

    #Use data set class in order to build train and test datasets
    train_dataset = SimpleDataset(train_data, train_labels)
    test_dataset = SimpleDataset(test_data, test_labels)

    #Build train and test dataloaders
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)
    return train_dataloader , test_dataloader

## Define Tokenizer and 'Bert' Model

In [8]:
#Define Bert tokenizer and model
# tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
# model = BertForSequenceClassification.from_pretrained("bert-base-uncased")

#Define AlBert tokenizer and model
bert_tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")
bert_model = AlbertForSequenceClassification.from_pretrained("albert-base-v2")

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.dense.bias', 'predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

In [9]:
#Bert base uncased architecture
bert_model

AlbertForSequenceClassification(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768,

In [10]:
n_parameters = sum(p.numel() for p in bert_model.parameters())
print(f"Number of parameters in model: {n_parameters}")

Number of parameters in model: 11685122


In [11]:
def tokenizing_batch_Bert(X, y, tokenizer) :
    """
    Does ... TODO
    X: Batch of reviews 
    y: labels of reviews in batch
    """
    inputs =  tokenizer(X, max_length = 128, padding = 'max_length', truncation = True, return_tensors="pt")

    input_ids = inputs['input_ids'].to(device)
    token_type_ids = inputs['token_type_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)
    labels = torch.LongTensor(y).T.to(device)
    
    return input_ids, token_type_ids, attention_mask, labels

## Training Loop and Hyperparameter Selection

In [12]:
def calc_accuracy(target,pred):
    target, pred = target.to(device),pred.to(device)
    correct = torch.sum(target==pred)
    accuracy = correct/len(target)
    return accuracy

In [95]:
def fineTuning(model, optim, epochs, train_dataloader, test_dataloader, tb_dirname, tokenizer,model_type):
    train_loss_list = []
    test_loss_list = []
    train_acc_list = []
    test_acc_list = []

    writer = SummaryWriter(log_dir=f'{tb_dir}/{tb_dirname}_{time.time()}')

    for epoch in tqdm(range(epochs), desc = "epochs", position=0):
        running_loss = 0
        model.train()
        # setup loop with TQDM and dataloader
        train_loop = tqdm(train_dataloader, desc='train',position =1, leave=False)
        for X, y in train_loop:
            # initialize calculated gradients (from prev step)
            optim.zero_grad()
            # pull all tensor batches required for training
            if model_type == 'bert':
                input_ids, token_type_ids, attention_mask, labels = tokenizing_batch_Bert(X, y, tokenizer)
                outputs = model(input_ids, attention_mask=attention_mask,
                token_type_ids=token_type_ids,
                labels=labels)
                

            elif model_type == 't5':
                ids, mask, labels = tokenizing_batch_T5(X, y, tokenizer)
                outputs = model(
                input_ids=ids,
                attention_mask=mask,
                labels=labels
                )

            # process
            loss = outputs.loss
            # calculate loss for every parameter that needs grad update
            loss.backward()
            # update parameters
            optim.step()
            running_loss+=loss.item()
            # print relevant info to progress bar
            # train_loop.set_description(f'Epoch {epoch}')
            train_loop.set_postfix(loss=loss.item())
        
        epoch_train_loss = running_loss/len(train_dataloader.dataset)
        train_loss_list.append(epoch_train_loss)


        test_loss = 0
        test_acc = 0
        model.eval()
        # we dont need to update weights, so we define no_grad() to save memory
        with torch.no_grad():
            eval_loop = tqdm(test_dataloader)
            for X_test, y_test in eval_loop:
                
                if model_type == 'bert':
                    input_ids, token_type_ids, attention_mask, labels = tokenizing_batch_Bert(X_test, y_test, tokenizer)
                    outputs = model(input_ids, attention_mask=attention_mask,
                                token_type_ids=token_type_ids,
                                labels=labels)
                    # get classification
                    logits = outputs.logits
                    predicted_token_class_ids = logits.argmax(-1)
                    # calculate accuracy score
                    acc_score = calc_accuracy(y_test, predicted_token_class_ids)
                    loss = outputs.loss
                
                elif model_type == 't5':
                    ids, mask, labels = tokenizing_batch_T5(X_test, y_test, tokenizer)
                    
                    generated_ids = model.generate(
                    input_ids=ids,
                    attention_mask=mask,
                    max_length=2
                    )

                    # For printing of real words 
                    preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
                    target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True) for t in labels]

                    loss = model(input_ids=ids, labels=labels).loss
                    acc_score = calc_accuracy(generated_ids[:,1], labels[:,0])
                    
                test_acc += acc_score.item()
                batch_test_loss = outputs.loss.item()
                test_loss+= batch_test_loss
                eval_loop.set_description(f'Epoch {epoch}')
                eval_loop.set_postfix(loss=batch_test_loss)
                
        epoch_acc_score = test_acc/len(test_dataloader)
        test_acc_list.append(epoch_acc_score)
        epoch_test_loss = test_loss/len(test_dataloader.dataset)
        test_loss_list.append(epoch_test_loss)

        writer.add_scalar(tag='loss/train', scalar_value=epoch_train_loss, global_step=epoch)
        writer.add_scalar(tag='loss/test', scalar_value=epoch_test_loss, global_step=epoch)
       
    
    return {
            'train_loss':train_loss_list,
            'test_loss':test_loss_list,
            'train_acc':train_acc_list,
            'test_acc':test_acc_list
            }

In [43]:
#Enter model to device
bert_model = AlbertForSequenceClassification.from_pretrained("albert-base-v2")
bert_model.to(device)
#Define optimizer
optim = torch.optim.AdamW(bert_model.parameters(), lr = 5e-5 )
#Define epochs number
epochs = 10
train_dataloader, test_dataloader = get_data(raw_data,
                                            train_size = 500,
                                            test_size = 100,
                                            batch_size = 16)

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.dense.bias', 'predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

In [44]:
#Fine Tuning Bert
loss_dic = fineTuning(  model=bert_model,
                        optim=optim,
                        epochs= epochs,
                        train_dataloader = train_dataloader,
                        test_dataloader=test_dataloader,
                        tb_dirname ='Bert',
                        tokenizer=bert_tokenizer,
                        model_type='bert')

Epoch 0: 100%|██████████| 32/32 [00:23<00:00,  1.34it/s, loss=0.579]
Epoch 0: 100%|██████████| 7/7 [00:01<00:00,  4.61it/s, loss=0.708]
Epoch 1: 100%|██████████| 32/32 [00:23<00:00,  1.36it/s, loss=0.575]
Epoch 1: 100%|██████████| 7/7 [00:01<00:00,  4.62it/s, loss=0.609]
Epoch 2: 100%|██████████| 32/32 [00:23<00:00,  1.36it/s, loss=0.809]
Epoch 2: 100%|██████████| 7/7 [00:01<00:00,  4.57it/s, loss=0.49] 
Epoch 3: 100%|██████████| 32/32 [00:23<00:00,  1.35it/s, loss=0.317]
Epoch 3: 100%|██████████| 7/7 [00:01<00:00,  4.49it/s, loss=0.511]
Epoch 4: 100%|██████████| 32/32 [00:24<00:00,  1.33it/s, loss=0.178]
Epoch 4: 100%|██████████| 7/7 [00:01<00:00,  4.60it/s, loss=0.153]
Epoch 5: 100%|██████████| 32/32 [00:23<00:00,  1.36it/s, loss=0.345]
Epoch 5: 100%|██████████| 7/7 [00:01<00:00,  4.60it/s, loss=0.115]
Epoch 6: 100%|██████████| 32/32 [00:23<00:00,  1.37it/s, loss=0.607] 
Epoch 6: 100%|██████████| 7/7 [00:01<00:00,  4.53it/s, loss=0.518]
Epoch 7: 100%|██████████| 32/32 [00:23<00:00,  

In [47]:
a = loss_dic['test_loss']
# px.line(a)
a

[0.04674081981182098,
 0.042834992408752444,
 0.04520301282405853,
 0.04039554446935654,
 0.04883110746741295,
 0.03474756591022014,
 0.049614324271678924,
 0.041130890548229215,
 0.04317125007510185,
 0.044952907860279084]

In [102]:
torch.cuda.empty_cache()

# T5

## Preprocess


In [48]:
#Read the raw data
raw_data = pd.read_csv(data_path+'dataset_raw.csv')
#Shuffle the data
raw_data = raw_data.sample(frac = 1)
#Make copy
data_T5 = raw_data.copy()
#Mapping 0 and 1 to "positive" and "negative"
data_T5['label']=data_T5['label'].map({1: 'positive', 0: 'negative'})

In [49]:
#Adding the "sst2 sentence:" prefix for the reviews
def add_T5_preffix_sentimant_classiication(row):
    row['review'] = "sst2 sentence: " + row['review'] 
    row['label'] = row['label']
    return row

data_T5 = data_T5.apply(lambda row: add_T5_preffix_sentimant_classiication(row), axis =1)


In [50]:
#Make lists for reviews and lables
reviews = data_T5['review'].tolist()
labels  = data_T5['label'].tolist()

In [62]:
#Define T5 Tokenizer and model
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")

In [55]:
#T5 small architecture
t5_model

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dro

In [56]:
n_parameters = sum(p.numel() for p in t5_model.parameters())
print(f"Number of parameters in model: {n_parameters}")

Number of parameters in model: 60506624


In [70]:
def tokenizing_batch_T5(X, y, tokenizer) :
    train_encoder_inputs = tokenizer(X, padding="longest", max_length=128, truncation=True, return_tensors="pt")
    train_decoder_inputs = tokenizer(y, padding="longest", max_length=2, truncation=True, return_tensors="pt")

    labels = train_decoder_inputs["input_ids"]
    labels[labels == tokenizer.pad_token_id] = -100
    labels = labels.to(device)
    ids = train_encoder_inputs["input_ids"].to(device)
    mask = train_encoder_inputs["input_ids"].to(device)
    
    return ids, mask, labels

In [101]:
#Enter model to device
t5_model.to(device)
#Define optimizer
optim = torch.optim.AdamW(t5_model.parameters(), lr = 5e-5 )
#Define epochs number
epochs = 5
train_dataloader, test_dataloader = get_data(data_T5,
                                            train_size = 750,
                                            test_size = 250,
                                            batch_size = 16)

In [98]:
loss_dic_t5 = fineTuning(model=t5_model,
                        optim=optim,
                        epochs= epochs,
                        train_dataloader = train_dataloader,
                        test_dataloader=test_dataloader,
                        tb_dirname ='T5',
                        tokenizer=t5_tokenizer,
                        model_type='t5')

Epoch 0: 100%|██████████| 7/7 [00:01<00:00,  5.48it/s, loss=6]
Epoch 1: 100%|██████████| 7/7 [00:01<00:00,  5.92it/s, loss=2.52]
Epoch 2: 100%|██████████| 7/7 [00:01<00:00,  6.11it/s, loss=2.69]
epochs: 100%|██████████| 3/3 [00:34<00:00, 11.36s/it]


In [100]:
loss_dic_t5['test_acc']

[0.0, 0.017857142857142856, 0.42857142857142855]

In [27]:

def FineTining_T5(model, optim, epochs, tokenizer , train_dataloader, test_dataloader, tb_dirname):

    train_loss_list = []
    test_loss_list = []
    test_accuracy = []

    writer = SummaryWriter(log_dir=f'{tb_dir}/{tb_dirname}_{time.time()}')


    for epoch in range(epochs):
        running_loss = 0
        model.train()
        # setup loop with TQDM and dataloader
        train_loop = tqdm(train_dataloader)
        for X, y in train_loop:
            # initialize calculated gradients (from prev step)
            optim.zero_grad()
            # pull all tensor batches required for training
            ids, mask, labels = tokenizing_batch_T5(X, y, tokenizer)
            # process
            outputs = model(
                input_ids=ids,
                attention_mask=mask,
                labels=labels
            )
            # logits =outputs[1]
            # tokens = torch.argmax(logits, dim=2)
            # sentiments = tokenizer.batch_decode(tokens)
            # labels = tokenizer.batch_decode(y_ids)

            # extract loss
            loss = outputs[0]
            # calculate loss for every parameter that needs grad update
            loss.backward()
            # update parameters
            optim.step()
            running_loss+=loss.item()
            # print relevant info to progress bar
            train_loop.set_description(f'Epoch {epoch}')
            train_loop.set_postfix(loss=loss.item())
        
        epoch_train_loss = running_loss/len(train_dataloader.dataset)
        train_loss_list.append(epoch_train_loss)


        #test_loss = 0
        total_test_acc = 0
        model.eval()
        with torch.no_grad():
            eval_loop = tqdm(test_dataloader)
            for X_test, y_test in eval_loop:
                ids, mask, labels = tokenizing_batch_T5(X_test, y_test, tokenizer)

                generated_ids = model.generate(
                    input_ids=ids,
                    attention_mask=mask,
                    max_length=2
                    )

                preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
                target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True) for t in labels]
                print(preds)
                print(target)
                total_test_acc += accuracy_score(target, preds)
                loss = model(input_ids=ids, labels=labels).loss
            
            #epoch_test_loss = test_loss/len(test_dataloader.dataset)
            epoch_test_acc = total_test_acc / len(test_dataloader.dataset)
            #test_loss_list.append(epoch_test_loss)
            test_accuracy.append(epoch_test_acc) 

        writer.add_scalar(tag='loss/train', scalar_value=epoch_train_loss, global_step=epoch)
        #writer.add_scalar(tag='loss/test', scalar_value=epoch_test_loss, global_step=epoch)
        writer.add_scalar(tag='acc/test', scalar_value=epoch_test_acc, global_step=epoch)
    
    return {
            'train_loss':train_loss_list,
            'test_loss':test_loss_list,
            'test_accuracy': test_accuracy
            }

In [28]:
#Enter model to device
model.to(device)
#Define optimizer
optim = AdamW(model.parameters(), lr = 1e-4)
#Define number of epochs
epochs = 5



In [29]:
training_info=FineTining_T5(model, optim, epochs, tokenizer , train_dataloader, test_dataloader, 'T5')

Epoch 0: 100%|██████████| 250/250 [28:08<00:00,  6.75s/it, loss=0.482]
  1%|          | 1/125 [00:02<04:50,  2.35s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


  2%|▏         | 2/125 [00:04<04:12,  2.05s/it]

['positive', 'positive', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


  2%|▏         | 3/125 [00:05<03:54,  1.92s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'positive']


  3%|▎         | 4/125 [00:07<03:43,  1.85s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'negative']


  4%|▍         | 5/125 [00:09<03:31,  1.77s/it]

['positive', 'negative', 'positive', 'negative']
['positive', 'negative', 'negative', 'negative']


  5%|▍         | 6/125 [00:10<03:24,  1.72s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'negative', 'negative']


  6%|▌         | 7/125 [00:12<03:18,  1.68s/it]

['negative', 'negative', 'positive', 'positive']
['positive', 'negative', 'positive', 'positive']


  6%|▋         | 8/125 [00:14<03:11,  1.64s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


  7%|▋         | 9/125 [00:15<03:08,  1.63s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


  8%|▊         | 10/125 [00:17<03:07,  1.63s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'positive', 'negative', 'positive']


  9%|▉         | 11/125 [00:19<03:08,  1.66s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 10%|▉         | 12/125 [00:20<03:08,  1.67s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


 10%|█         | 13/125 [00:22<03:06,  1.67s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'negative', 'positive']


 11%|█         | 14/125 [00:24<03:02,  1.64s/it]

['negative', 'negative', 'positive', 'positive']
['negative', 'positive', 'negative', 'positive']


 12%|█▏        | 15/125 [00:25<03:03,  1.67s/it]

['negative', 'negative', 'positive', 'positive']
['positive', 'positive', 'negative', 'positive']


 13%|█▎        | 16/125 [00:27<03:06,  1.71s/it]

['positive', 'negative', 'positive', 'positive']
['positive', 'positive', 'positive', 'negative']


 14%|█▎        | 17/125 [00:29<03:10,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 14%|█▍        | 18/125 [00:31<03:18,  1.85s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'negative', 'negative']


 15%|█▌        | 19/125 [00:33<03:20,  1.89s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'positive', 'negative', 'negative']


 16%|█▌        | 20/125 [00:35<03:20,  1.91s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'negative', 'positive']


 17%|█▋        | 21/125 [00:37<03:16,  1.89s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 18%|█▊        | 22/125 [00:38<03:07,  1.82s/it]

['negative', 'positive', 'positive', 'negative']
['negative', 'positive', 'negative', 'negative']


 18%|█▊        | 23/125 [00:40<03:05,  1.82s/it]

['positive', 'positive', 'negative', 'positive']
['negative', 'positive', 'positive', 'positive']


 19%|█▉        | 24/125 [00:42<03:00,  1.79s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'positive', 'positive']


 20%|██        | 25/125 [00:44<02:58,  1.78s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'positive']


 21%|██        | 26/125 [00:45<02:56,  1.78s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'negative', 'negative', 'negative']


 22%|██▏       | 27/125 [00:47<02:51,  1.75s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'positive', 'negative', 'negative']


 22%|██▏       | 28/125 [00:49<02:47,  1.73s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'positive']


 23%|██▎       | 29/125 [00:51<02:43,  1.71s/it]

['positive', 'negative', 'positive', 'negative']
['positive', 'positive', 'positive', 'negative']


 24%|██▍       | 30/125 [00:52<02:42,  1.71s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 25%|██▍       | 31/125 [00:54<02:40,  1.70s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


 26%|██▌       | 32/125 [00:55<02:33,  1.65s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 26%|██▋       | 33/125 [00:57<02:31,  1.65s/it]

['negative', 'negative', 'positive', 'positive']
['negative', 'negative', 'positive', 'positive']


 27%|██▋       | 34/125 [00:59<02:30,  1.65s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'negative']


 28%|██▊       | 35/125 [01:00<02:26,  1.63s/it]

['positive', 'negative', 'positive', 'positive']
['positive', 'positive', 'positive', 'positive']


 29%|██▉       | 36/125 [01:02<02:23,  1.62s/it]

['negative', 'negative', 'positive', 'positive']
['positive', 'negative', 'negative', 'positive']


 30%|██▉       | 37/125 [01:03<02:21,  1.60s/it]

['negative', 'positive', 'positive', 'negative']
['negative', 'positive', 'positive', 'negative']


 30%|███       | 38/125 [01:05<02:17,  1.58s/it]

['positive', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 31%|███       | 39/125 [01:07<02:15,  1.58s/it]

['negative', 'positive', 'positive', 'negative']
['positive', 'negative', 'negative', 'positive']


 32%|███▏      | 40/125 [01:08<02:13,  1.57s/it]

['negative', 'positive', 'negative', 'positive']
['negative', 'positive', 'negative', 'negative']


 33%|███▎      | 41/125 [01:10<02:12,  1.58s/it]

['positive', 'positive', 'positive', 'negative']
['positive', 'positive', 'positive', 'negative']


 34%|███▎      | 42/125 [01:11<02:12,  1.59s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'positive', 'negative', 'positive']


 34%|███▍      | 43/125 [01:13<02:11,  1.61s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'positive']


 35%|███▌      | 44/125 [01:15<02:11,  1.62s/it]

['positive', 'positive', 'positive', 'negative']
['positive', 'positive', 'negative', 'negative']


 36%|███▌      | 45/125 [01:16<02:11,  1.64s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 37%|███▋      | 46/125 [01:18<02:15,  1.71s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'positive']


 38%|███▊      | 47/125 [01:20<02:16,  1.76s/it]

['negative', 'positive', 'negative', 'positive']
['negative', 'positive', 'positive', 'negative']


 38%|███▊      | 48/125 [01:22<02:16,  1.78s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 39%|███▉      | 49/125 [01:24<02:16,  1.80s/it]

['positive', 'positive', 'negative', 'positive']
['negative', 'negative', 'negative', 'positive']


 40%|████      | 50/125 [01:26<02:16,  1.82s/it]

['negative', 'positive', 'negative', 'positive']
['positive', 'positive', 'negative', 'negative']


 41%|████      | 51/125 [01:27<02:13,  1.80s/it]

['positive', 'positive', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 42%|████▏     | 52/125 [01:29<02:06,  1.73s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'negative']


 42%|████▏     | 53/125 [01:31<02:00,  1.68s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 43%|████▎     | 54/125 [01:32<01:55,  1.62s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'negative', 'positive', 'positive']


 44%|████▍     | 55/125 [01:34<01:55,  1.66s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 45%|████▍     | 56/125 [01:35<01:53,  1.65s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'negative', 'positive']


 46%|████▌     | 57/125 [01:37<01:49,  1.62s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


 46%|████▋     | 58/125 [01:38<01:47,  1.60s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


 47%|████▋     | 59/125 [01:40<01:44,  1.59s/it]

['positive', 'negative', 'positive', 'negative']
['positive', 'negative', 'negative', 'positive']


 48%|████▊     | 60/125 [01:42<01:47,  1.65s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


 49%|████▉     | 61/125 [01:43<01:45,  1.65s/it]

['positive', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 50%|████▉     | 62/125 [01:45<01:43,  1.64s/it]

['positive', 'negative', 'negative', 'positive']
['positive', 'positive', 'negative', 'positive']


 50%|█████     | 63/125 [01:47<01:41,  1.63s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 51%|█████     | 64/125 [01:48<01:37,  1.60s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'positive']


 52%|█████▏    | 65/125 [01:50<01:34,  1.58s/it]

['positive', 'negative', 'negative', 'positive']
['positive', 'positive', 'negative', 'negative']


 53%|█████▎    | 66/125 [01:51<01:33,  1.59s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'negative', 'positive', 'positive']


 54%|█████▎    | 67/125 [01:53<01:31,  1.58s/it]

['negative', 'positive', 'positive', 'negative']
['positive', 'positive', 'negative', 'positive']


 54%|█████▍    | 68/125 [01:54<01:29,  1.58s/it]

['positive', 'negative', 'positive', 'negative']
['positive', 'negative', 'negative', 'negative']


 55%|█████▌    | 69/125 [01:56<01:27,  1.57s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 56%|█████▌    | 70/125 [01:58<01:26,  1.57s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'negative', 'positive', 'negative']


 57%|█████▋    | 71/125 [01:59<01:24,  1.56s/it]

['positive', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'positive']


 58%|█████▊    | 72/125 [02:01<01:25,  1.60s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'negative']


 58%|█████▊    | 73/125 [02:03<01:27,  1.68s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'negative', 'negative', 'positive']


 59%|█████▉    | 74/125 [02:04<01:24,  1.66s/it]

['negative', 'positive', 'positive', 'negative']
['negative', 'positive', 'negative', 'positive']


 60%|██████    | 75/125 [02:06<01:21,  1.64s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 61%|██████    | 76/125 [02:08<01:19,  1.63s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 62%|██████▏   | 77/125 [02:09<01:16,  1.60s/it]

['positive', 'negative', 'positive', 'positive']
['negative', 'negative', 'positive', 'negative']


 62%|██████▏   | 78/125 [02:11<01:14,  1.59s/it]

['positive', 'negative', 'negative', 'positive']
['negative', 'positive', 'negative', 'negative']


 63%|██████▎   | 79/125 [02:12<01:12,  1.58s/it]

['negative', 'positive', 'negative', 'positive']
['positive', 'negative', 'negative', 'positive']


 64%|██████▍   | 80/125 [02:14<01:10,  1.57s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 65%|██████▍   | 81/125 [02:15<01:09,  1.57s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 66%|██████▌   | 82/125 [02:17<01:08,  1.59s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'positive']


 66%|██████▋   | 83/125 [02:18<01:06,  1.59s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'positive', 'positive', 'negative']


 67%|██████▋   | 84/125 [02:20<01:05,  1.59s/it]

['positive', 'negative', 'positive', 'negative']
['negative', 'positive', 'positive', 'negative']


 68%|██████▊   | 85/125 [02:22<01:04,  1.61s/it]

['negative', 'positive', 'positive', 'negative']
['negative', 'positive', 'positive', 'positive']


 69%|██████▉   | 86/125 [02:24<01:05,  1.69s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 70%|██████▉   | 87/125 [02:26<01:08,  1.80s/it]

['positive', 'negative', 'negative', 'positive']
['negative', 'negative', 'positive', 'negative']


 70%|███████   | 88/125 [02:28<01:08,  1.86s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'positive']


 71%|███████   | 89/125 [02:30<01:10,  1.95s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 72%|███████▏  | 90/125 [02:32<01:08,  1.96s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 73%|███████▎  | 91/125 [02:34<01:05,  1.93s/it]

['positive', 'positive', 'positive', 'negative']
['negative', 'negative', 'negative', 'negative']


 74%|███████▎  | 92/125 [02:35<01:00,  1.84s/it]

['positive', 'negative', 'positive', 'positive']
['positive', 'positive', 'negative', 'positive']


 74%|███████▍  | 93/125 [02:37<00:57,  1.80s/it]

['positive', 'positive', 'negative', 'positive']
['positive', 'positive', 'positive', 'negative']


 75%|███████▌  | 94/125 [02:39<00:55,  1.79s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'positive', 'positive']


 76%|███████▌  | 95/125 [02:40<00:52,  1.76s/it]

['negative', 'positive', 'positive', 'negative']
['positive', 'positive', 'negative', 'positive']


 77%|███████▋  | 96/125 [02:42<00:50,  1.75s/it]

['negative', 'positive', 'positive', 'positive']
['positive', 'negative', 'positive', 'negative']


 78%|███████▊  | 97/125 [02:44<00:48,  1.73s/it]

['negative', 'positive', 'positive', 'positive']
['positive', 'negative', 'positive', 'negative']


 78%|███████▊  | 98/125 [02:46<00:46,  1.71s/it]

['positive', 'positive', 'positive', 'negative']
['positive', 'positive', 'negative', 'positive']


 79%|███████▉  | 99/125 [02:47<00:44,  1.72s/it]

['positive', 'negative', 'negative', 'positive']
['negative', 'negative', 'negative', 'negative']


 80%|████████  | 100/125 [02:49<00:42,  1.72s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 81%|████████  | 101/125 [02:51<00:40,  1.69s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 82%|████████▏ | 102/125 [02:52<00:38,  1.67s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'negative', 'positive', 'positive']


 82%|████████▏ | 103/125 [02:54<00:36,  1.65s/it]

['negative', 'negative', 'positive', 'positive']
['negative', 'positive', 'positive', 'positive']


 83%|████████▎ | 104/125 [02:56<00:35,  1.68s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 84%|████████▍ | 105/125 [02:57<00:33,  1.66s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'negative', 'negative', 'positive']


 85%|████████▍ | 106/125 [02:59<00:31,  1.63s/it]

['positive', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 86%|████████▌ | 107/125 [03:00<00:29,  1.62s/it]

['negative', 'positive', 'positive', 'negative']
['positive', 'negative', 'positive', 'positive']


 86%|████████▋ | 108/125 [03:02<00:27,  1.62s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 87%|████████▋ | 109/125 [03:04<00:26,  1.63s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'negative', 'negative', 'negative']


 88%|████████▊ | 110/125 [03:05<00:24,  1.64s/it]

['negative', 'positive', 'positive', 'positive']
['positive', 'positive', 'positive', 'negative']


 89%|████████▉ | 111/125 [03:07<00:22,  1.63s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 90%|████████▉ | 112/125 [03:09<00:21,  1.64s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 90%|█████████ | 113/125 [03:10<00:19,  1.65s/it]

['negative', 'positive', 'negative', 'positive']
['negative', 'positive', 'negative', 'positive']


 91%|█████████ | 114/125 [03:12<00:18,  1.64s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


 92%|█████████▏| 115/125 [03:14<00:17,  1.72s/it]

['positive', 'positive', 'positive', 'positive']
['positive', 'positive', 'positive', 'negative']


 93%|█████████▎| 116/125 [03:16<00:15,  1.73s/it]

['positive', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 94%|█████████▎| 117/125 [03:17<00:13,  1.70s/it]

['positive', 'positive', 'negative', 'positive']
['positive', 'positive', 'positive', 'positive']


 94%|█████████▍| 118/125 [03:19<00:11,  1.68s/it]

['negative', 'positive', 'positive', 'positive']
['positive', 'negative', 'negative', 'positive']


 95%|█████████▌| 119/125 [03:20<00:09,  1.66s/it]

['positive', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'positive']


 96%|█████████▌| 120/125 [03:22<00:08,  1.65s/it]

['negative', 'positive', 'positive', 'negative']
['negative', 'positive', 'positive', 'positive']


 97%|█████████▋| 121/125 [03:24<00:06,  1.63s/it]

['positive', 'negative', 'positive', 'negative']
['positive', 'positive', 'positive', 'positive']


 98%|█████████▊| 122/125 [03:25<00:04,  1.63s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 98%|█████████▊| 123/125 [03:27<00:03,  1.62s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'negative', 'positive', 'negative']


 99%|█████████▉| 124/125 [03:28<00:01,  1.62s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'positive']


100%|██████████| 125/125 [03:30<00:00,  1.68s/it]


['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


Epoch 1: 100%|██████████| 250/250 [26:33<00:00,  6.37s/it, loss=0.359]
  1%|          | 1/125 [00:01<03:16,  1.59s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


  2%|▏         | 2/125 [00:03<03:25,  1.67s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


  2%|▏         | 3/125 [00:05<03:26,  1.69s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


  3%|▎         | 4/125 [00:06<03:28,  1.72s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'negative']


  4%|▍         | 5/125 [00:08<03:30,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


  5%|▍         | 6/125 [00:10<03:27,  1.74s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


  6%|▌         | 7/125 [00:12<03:28,  1.76s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'negative', 'positive', 'positive']


  6%|▋         | 8/125 [00:13<03:27,  1.77s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


  7%|▋         | 9/125 [00:15<03:25,  1.77s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


  8%|▊         | 10/125 [00:17<03:22,  1.76s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'positive', 'negative', 'positive']


  9%|▉         | 11/125 [00:19<03:20,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 10%|▉         | 12/125 [00:20<03:18,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


 10%|█         | 13/125 [00:22<03:15,  1.74s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'positive']


 11%|█         | 14/125 [00:24<03:14,  1.75s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'negative', 'positive']


 12%|█▏        | 15/125 [00:26<03:12,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 13%|█▎        | 16/125 [00:28<03:13,  1.77s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'negative']


 14%|█▎        | 17/125 [00:29<03:14,  1.80s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 14%|█▍        | 18/125 [00:31<03:15,  1.83s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 15%|█▌        | 19/125 [00:33<03:11,  1.80s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'positive', 'negative', 'negative']


 16%|█▌        | 20/125 [00:35<03:06,  1.78s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 17%|█▋        | 21/125 [00:36<03:04,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 18%|█▊        | 22/125 [00:38<03:01,  1.76s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


 18%|█▊        | 23/125 [00:40<03:00,  1.77s/it]

['negative', 'positive', 'negative', 'positive']
['negative', 'positive', 'positive', 'positive']


 19%|█▉        | 24/125 [00:42<02:56,  1.75s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'positive', 'positive']


 20%|██        | 25/125 [00:43<02:55,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


 21%|██        | 26/125 [00:45<02:53,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 22%|██▏       | 27/125 [00:47<02:50,  1.74s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 22%|██▏       | 28/125 [00:49<02:50,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'positive']


 23%|██▎       | 29/125 [00:50<02:48,  1.76s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'positive', 'positive', 'negative']


 24%|██▍       | 30/125 [00:52<02:47,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 25%|██▍       | 31/125 [00:54<02:43,  1.74s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


 26%|██▌       | 32/125 [00:56<02:43,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 26%|██▋       | 33/125 [00:57<02:40,  1.74s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'negative', 'positive', 'positive']


 27%|██▋       | 34/125 [00:59<02:38,  1.74s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'negative']


 28%|██▊       | 35/125 [01:01<02:36,  1.74s/it]

['positive', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'positive']


 29%|██▉       | 36/125 [01:03<02:35,  1.74s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'negative', 'negative', 'positive']


 30%|██▉       | 37/125 [01:04<02:33,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 30%|███       | 38/125 [01:06<02:34,  1.78s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 31%|███       | 39/125 [01:08<02:32,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 32%|███▏      | 40/125 [01:10<02:36,  1.84s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


 33%|███▎      | 41/125 [01:12<02:44,  1.96s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 34%|███▎      | 42/125 [01:14<02:43,  1.97s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'positive', 'negative', 'positive']


 34%|███▍      | 43/125 [01:16<02:40,  1.96s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'positive']


 35%|███▌      | 44/125 [01:18<02:36,  1.93s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'positive', 'negative', 'negative']


 36%|███▌      | 45/125 [01:20<02:30,  1.89s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 37%|███▋      | 46/125 [01:22<02:27,  1.87s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'positive']


 38%|███▊      | 47/125 [01:24<02:26,  1.88s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 38%|███▊      | 48/125 [01:25<02:21,  1.84s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 39%|███▉      | 49/125 [01:27<02:18,  1.82s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'positive']


 40%|████      | 50/125 [01:29<02:17,  1.84s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 41%|████      | 51/125 [01:31<02:14,  1.82s/it]

['positive', 'positive', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 42%|████▏     | 52/125 [01:33<02:11,  1.80s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'negative']


 42%|████▏     | 53/125 [01:34<02:07,  1.78s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 43%|████▎     | 54/125 [01:36<02:06,  1.78s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'positive']


 44%|████▍     | 55/125 [01:38<02:04,  1.78s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 45%|████▍     | 56/125 [01:40<02:03,  1.78s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'negative', 'positive']


 46%|████▌     | 57/125 [01:41<02:00,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


 46%|████▋     | 58/125 [01:43<01:58,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


 47%|████▋     | 59/125 [01:45<01:57,  1.78s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 48%|████▊     | 60/125 [01:47<01:56,  1.79s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


 49%|████▉     | 61/125 [01:48<01:53,  1.77s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 50%|████▉     | 62/125 [01:50<01:51,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 50%|█████     | 63/125 [01:52<01:49,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 51%|█████     | 64/125 [01:54<01:47,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'positive']


 52%|█████▏    | 65/125 [01:55<01:45,  1.75s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 53%|█████▎    | 66/125 [01:57<01:43,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'positive']


 54%|█████▎    | 67/125 [01:59<01:41,  1.76s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 54%|█████▍    | 68/125 [02:01<01:39,  1.75s/it]

['positive', 'negative', 'positive', 'negative']
['positive', 'negative', 'negative', 'negative']


 55%|█████▌    | 69/125 [02:03<01:39,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 56%|█████▌    | 70/125 [02:04<01:37,  1.78s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'negative']


 57%|█████▋    | 71/125 [02:06<01:35,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 58%|█████▊    | 72/125 [02:08<01:33,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 58%|█████▊    | 73/125 [02:10<01:31,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'positive']


 59%|█████▉    | 74/125 [02:11<01:30,  1.78s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'positive']


 60%|██████    | 75/125 [02:13<01:29,  1.78s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 61%|██████    | 76/125 [02:15<01:27,  1.79s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 62%|██████▏   | 77/125 [02:17<01:25,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 62%|██████▏   | 78/125 [02:19<01:23,  1.78s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'negative', 'negative']


 63%|██████▎   | 79/125 [02:20<01:22,  1.79s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'negative', 'negative', 'positive']


 64%|██████▍   | 80/125 [02:22<01:21,  1.80s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 65%|██████▍   | 81/125 [02:24<01:19,  1.80s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 66%|██████▌   | 82/125 [02:26<01:16,  1.78s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'positive']


 66%|██████▋   | 83/125 [02:28<01:15,  1.80s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'positive', 'positive', 'negative']


 67%|██████▋   | 84/125 [02:29<01:13,  1.79s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 68%|██████▊   | 85/125 [02:31<01:11,  1.78s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'positive']


 69%|██████▉   | 86/125 [02:33<01:09,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 70%|██████▉   | 87/125 [02:35<01:07,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 70%|███████   | 88/125 [02:36<01:05,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


 71%|███████   | 89/125 [02:38<01:03,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 72%|███████▏  | 90/125 [02:40<01:01,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 73%|███████▎  | 91/125 [02:42<00:59,  1.75s/it]

['positive', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


 74%|███████▎  | 92/125 [02:43<00:57,  1.76s/it]

['positive', 'negative', 'positive', 'positive']
['positive', 'positive', 'negative', 'positive']


 74%|███████▍  | 93/125 [02:45<00:55,  1.74s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 75%|███████▌  | 94/125 [02:47<00:55,  1.78s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'positive', 'positive']


 76%|███████▌  | 95/125 [02:49<00:52,  1.76s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 77%|███████▋  | 96/125 [02:50<00:51,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'negative']


 78%|███████▊  | 97/125 [02:52<00:49,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'negative']


 78%|███████▊  | 98/125 [02:54<00:47,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 79%|███████▉  | 99/125 [02:56<00:45,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


 80%|████████  | 100/125 [02:57<00:43,  1.74s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 81%|████████  | 101/125 [02:59<00:42,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 82%|████████▏ | 102/125 [03:01<00:40,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'positive']


 82%|████████▏ | 103/125 [03:03<00:38,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'positive']


 83%|████████▎ | 104/125 [03:04<00:36,  1.75s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 84%|████████▍ | 105/125 [03:06<00:35,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'positive']


 85%|████████▍ | 106/125 [03:08<00:33,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 86%|████████▌ | 107/125 [03:10<00:31,  1.75s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'positive']


 86%|████████▋ | 108/125 [03:11<00:29,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 87%|████████▋ | 109/125 [03:13<00:27,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 88%|████████▊ | 110/125 [03:15<00:26,  1.75s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'positive', 'positive', 'negative']


 89%|████████▉ | 111/125 [03:17<00:24,  1.74s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 90%|████████▉ | 112/125 [03:19<00:23,  1.79s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 90%|█████████ | 113/125 [03:20<00:21,  1.78s/it]

['negative', 'positive', 'negative', 'positive']
['negative', 'positive', 'negative', 'positive']


 91%|█████████ | 114/125 [03:22<00:19,  1.78s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


 92%|█████████▏| 115/125 [03:24<00:17,  1.80s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 93%|█████████▎| 116/125 [03:26<00:16,  1.79s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 94%|█████████▎| 117/125 [03:27<00:14,  1.78s/it]

['positive', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'positive']


 94%|█████████▍| 118/125 [03:29<00:12,  1.76s/it]

['negative', 'negative', 'positive', 'positive']
['positive', 'negative', 'negative', 'positive']


 95%|█████████▌| 119/125 [03:31<00:10,  1.78s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'positive']


 96%|█████████▌| 120/125 [03:33<00:08,  1.77s/it]

['negative', 'positive', 'positive', 'negative']
['negative', 'positive', 'positive', 'positive']


 97%|█████████▋| 121/125 [03:35<00:07,  1.77s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 98%|█████████▊| 122/125 [03:36<00:05,  1.76s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 98%|█████████▊| 123/125 [03:38<00:03,  1.75s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 99%|█████████▉| 124/125 [03:40<00:01,  1.75s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'positive']


100%|██████████| 125/125 [03:42<00:00,  1.78s/it]


['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


Epoch 2: 100%|██████████| 250/250 [24:46<00:00,  5.95s/it, loss=0.247]
  1%|          | 1/125 [00:01<03:06,  1.51s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


  2%|▏         | 2/125 [00:02<02:59,  1.46s/it]

['positive', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


  2%|▏         | 3/125 [00:04<02:59,  1.47s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'positive']


  3%|▎         | 4/125 [00:05<03:00,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'negative']


  4%|▍         | 5/125 [00:07<02:59,  1.50s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'negative', 'negative']


  5%|▍         | 6/125 [00:08<02:59,  1.51s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


  6%|▌         | 7/125 [00:10<02:58,  1.51s/it]

['negative', 'negative', 'positive', 'positive']
['positive', 'negative', 'positive', 'positive']


  6%|▋         | 8/125 [00:11<02:54,  1.49s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


  7%|▋         | 9/125 [00:13<02:56,  1.52s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


  8%|▊         | 10/125 [00:15<02:53,  1.51s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'positive', 'negative', 'positive']


  9%|▉         | 11/125 [00:16<02:52,  1.51s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 10%|▉         | 12/125 [00:17<02:46,  1.47s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


 10%|█         | 13/125 [00:19<02:45,  1.48s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'positive']


 11%|█         | 14/125 [00:20<02:44,  1.48s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'positive']


 12%|█▏        | 15/125 [00:22<02:43,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 13%|█▎        | 16/125 [00:23<02:43,  1.50s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'negative']


 14%|█▎        | 17/125 [00:25<02:40,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 14%|█▍        | 18/125 [00:26<02:40,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 15%|█▌        | 19/125 [00:28<02:37,  1.48s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'positive', 'negative', 'negative']


 16%|█▌        | 20/125 [00:30<02:41,  1.54s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 17%|█▋        | 21/125 [00:31<02:37,  1.52s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 18%|█▊        | 22/125 [00:32<02:34,  1.50s/it]

['negative', 'positive', 'positive', 'negative']
['negative', 'positive', 'negative', 'negative']


 18%|█▊        | 23/125 [00:34<02:33,  1.51s/it]

['negative', 'positive', 'negative', 'positive']
['negative', 'positive', 'positive', 'positive']


 19%|█▉        | 24/125 [00:36<02:33,  1.52s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'positive', 'positive']


 20%|██        | 25/125 [00:37<02:31,  1.52s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'positive']


 21%|██        | 26/125 [00:38<02:26,  1.48s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 22%|██▏       | 27/125 [00:40<02:27,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 22%|██▏       | 28/125 [00:41<02:25,  1.50s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'negative', 'positive', 'positive']


 23%|██▎       | 29/125 [00:43<02:23,  1.49s/it]

['positive', 'negative', 'positive', 'negative']
['positive', 'positive', 'positive', 'negative']


 24%|██▍       | 30/125 [00:44<02:21,  1.49s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 25%|██▍       | 31/125 [00:46<02:20,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


 26%|██▌       | 32/125 [00:47<02:18,  1.49s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 26%|██▋       | 33/125 [00:49<02:17,  1.49s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'negative', 'positive', 'positive']


 27%|██▋       | 34/125 [00:50<02:13,  1.46s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'negative']


 28%|██▊       | 35/125 [00:52<02:11,  1.46s/it]

['positive', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'positive']


 29%|██▉       | 36/125 [00:53<02:09,  1.45s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 30%|██▉       | 37/125 [00:55<02:10,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 30%|███       | 38/125 [00:56<02:10,  1.50s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 31%|███       | 39/125 [00:58<02:09,  1.51s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'negative', 'positive']


 32%|███▏      | 40/125 [00:59<02:07,  1.50s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


 33%|███▎      | 41/125 [01:01<02:06,  1.50s/it]

['positive', 'positive', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 34%|███▎      | 42/125 [01:02<02:05,  1.51s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'positive', 'negative', 'positive']


 34%|███▍      | 43/125 [01:04<02:02,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'positive']


 35%|███▌      | 44/125 [01:05<02:06,  1.56s/it]

['negative', 'positive', 'positive', 'negative']
['positive', 'positive', 'negative', 'negative']


 36%|███▌      | 45/125 [01:07<02:06,  1.58s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 37%|███▋      | 46/125 [01:09<02:06,  1.60s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'negative', 'positive']


 38%|███▊      | 47/125 [01:10<02:01,  1.56s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 38%|███▊      | 48/125 [01:12<01:59,  1.55s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 39%|███▉      | 49/125 [01:13<01:59,  1.58s/it]

['positive', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'positive']


 40%|████      | 50/125 [01:15<01:57,  1.57s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 41%|████      | 51/125 [01:16<01:53,  1.53s/it]

['positive', 'positive', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 42%|████▏     | 52/125 [01:18<01:50,  1.52s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'negative']


 42%|████▏     | 53/125 [01:19<01:48,  1.51s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 43%|████▎     | 54/125 [01:21<01:46,  1.51s/it]

['negative', 'negative', 'positive', 'negative']
['negative', 'negative', 'positive', 'positive']


 44%|████▍     | 55/125 [01:22<01:45,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 45%|████▍     | 56/125 [01:24<01:43,  1.49s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'negative', 'positive']


 46%|████▌     | 57/125 [01:25<01:41,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


 46%|████▋     | 58/125 [01:27<01:39,  1.49s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


 47%|████▋     | 59/125 [01:28<01:38,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 48%|████▊     | 60/125 [01:30<01:37,  1.50s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'negative', 'negative']


 49%|████▉     | 61/125 [01:31<01:37,  1.52s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 50%|████▉     | 62/125 [01:33<01:34,  1.50s/it]

['positive', 'negative', 'negative', 'positive']
['positive', 'positive', 'negative', 'positive']


 50%|█████     | 63/125 [01:34<01:32,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 51%|█████     | 64/125 [01:36<01:30,  1.48s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'positive']


 52%|█████▏    | 65/125 [01:37<01:29,  1.50s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 53%|█████▎    | 66/125 [01:39<01:29,  1.51s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'positive']


 54%|█████▎    | 67/125 [01:40<01:26,  1.49s/it]

['negative', 'positive', 'positive', 'negative']
['positive', 'positive', 'negative', 'positive']


 54%|█████▍    | 68/125 [01:42<01:25,  1.49s/it]

['positive', 'negative', 'positive', 'negative']
['positive', 'negative', 'negative', 'negative']


 55%|█████▌    | 69/125 [01:43<01:23,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 56%|█████▌    | 70/125 [01:45<01:22,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'negative']


 57%|█████▋    | 71/125 [01:46<01:21,  1.50s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'positive']


 58%|█████▊    | 72/125 [01:48<01:19,  1.50s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'negative']


 58%|█████▊    | 73/125 [01:49<01:17,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'positive']


 59%|█████▉    | 74/125 [01:51<01:16,  1.49s/it]

['negative', 'positive', 'positive', 'negative']
['negative', 'positive', 'negative', 'positive']


 60%|██████    | 75/125 [01:52<01:14,  1.50s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 61%|██████    | 76/125 [01:54<01:14,  1.52s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 62%|██████▏   | 77/125 [01:55<01:12,  1.52s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 62%|██████▏   | 78/125 [01:57<01:10,  1.49s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'negative', 'negative']


 63%|██████▎   | 79/125 [01:58<01:08,  1.50s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'negative', 'negative', 'positive']


 64%|██████▍   | 80/125 [02:00<01:07,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'positive']


 65%|██████▍   | 81/125 [02:01<01:05,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 66%|██████▌   | 82/125 [02:03<01:04,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'positive']


 66%|██████▋   | 83/125 [02:04<01:03,  1.52s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'positive', 'positive', 'negative']


 67%|██████▋   | 84/125 [02:06<01:01,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 68%|██████▊   | 85/125 [02:07<01:00,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'positive']


 69%|██████▉   | 86/125 [02:09<00:58,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 70%|██████▉   | 87/125 [02:10<00:56,  1.50s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'negative', 'positive', 'negative']


 70%|███████   | 88/125 [02:12<00:55,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'positive']


 71%|███████   | 89/125 [02:13<00:53,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 72%|███████▏  | 90/125 [02:15<00:52,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 73%|███████▎  | 91/125 [02:16<00:50,  1.49s/it]

['positive', 'negative', 'positive', 'negative']
['negative', 'negative', 'negative', 'negative']


 74%|███████▎  | 92/125 [02:18<00:49,  1.51s/it]

['positive', 'negative', 'positive', 'positive']
['positive', 'positive', 'negative', 'positive']


 74%|███████▍  | 93/125 [02:19<00:47,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 75%|███████▌  | 94/125 [02:21<00:46,  1.50s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'positive', 'positive', 'positive']


 76%|███████▌  | 95/125 [02:22<00:44,  1.49s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 77%|███████▋  | 96/125 [02:24<00:42,  1.48s/it]

['negative', 'negative', 'positive', 'positive']
['positive', 'negative', 'positive', 'negative']


 78%|███████▊  | 97/125 [02:25<00:41,  1.48s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'positive', 'negative']


 78%|███████▊  | 98/125 [02:27<00:41,  1.53s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 79%|███████▉  | 99/125 [02:28<00:39,  1.51s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


 80%|████████  | 100/125 [02:30<00:37,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'negative']


 81%|████████  | 101/125 [02:31<00:35,  1.49s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'negative']


 82%|████████▏ | 102/125 [02:33<00:34,  1.48s/it]

['negative', 'negative', 'negative', 'positive']
['negative', 'negative', 'positive', 'positive']


 82%|████████▏ | 103/125 [02:34<00:32,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'positive', 'positive', 'positive']


 83%|████████▎ | 104/125 [02:36<00:31,  1.52s/it]

['negative', 'positive', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 84%|████████▍ | 105/125 [02:37<00:29,  1.48s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'positive']


 85%|████████▍ | 106/125 [02:39<00:28,  1.47s/it]

['negative', 'positive', 'negative', 'positive']
['positive', 'positive', 'negative', 'negative']


 86%|████████▌ | 107/125 [02:40<00:27,  1.50s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'positive']


 86%|████████▋ | 108/125 [02:42<00:25,  1.52s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'positive', 'positive']


 87%|████████▋ | 109/125 [02:43<00:24,  1.53s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'negative', 'negative', 'negative']


 88%|████████▊ | 110/125 [02:45<00:22,  1.53s/it]

['negative', 'positive', 'positive', 'negative']
['positive', 'positive', 'positive', 'negative']


 89%|████████▉ | 111/125 [02:46<00:21,  1.52s/it]

['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'positive']


 90%|████████▉ | 112/125 [02:48<00:19,  1.49s/it]

['negative', 'positive', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 90%|█████████ | 113/125 [02:49<00:17,  1.49s/it]

['negative', 'positive', 'negative', 'positive']
['negative', 'positive', 'negative', 'positive']


 91%|█████████ | 114/125 [02:51<00:16,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'negative']


 92%|█████████▏| 115/125 [02:52<00:15,  1.51s/it]

['positive', 'positive', 'positive', 'negative']
['positive', 'positive', 'positive', 'negative']


 93%|█████████▎| 116/125 [02:54<00:13,  1.51s/it]

['positive', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


 94%|█████████▎| 117/125 [02:55<00:12,  1.50s/it]

['positive', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'positive']


 94%|█████████▍| 118/125 [02:57<00:10,  1.48s/it]

['negative', 'negative', 'positive', 'positive']
['positive', 'negative', 'negative', 'positive']


 95%|█████████▌| 119/125 [02:58<00:08,  1.50s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'negative', 'positive']


 96%|█████████▌| 120/125 [03:00<00:07,  1.53s/it]

['negative', 'positive', 'positive', 'negative']
['negative', 'positive', 'positive', 'positive']


 97%|█████████▋| 121/125 [03:02<00:06,  1.55s/it]

['negative', 'negative', 'positive', 'negative']
['positive', 'positive', 'positive', 'positive']


 98%|█████████▊| 122/125 [03:03<00:04,  1.54s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 98%|█████████▊| 123/125 [03:04<00:03,  1.51s/it]

['negative', 'negative', 'negative', 'negative']
['negative', 'negative', 'positive', 'negative']


 99%|█████████▉| 124/125 [03:06<00:01,  1.52s/it]

['negative', 'negative', 'negative', 'positive']
['positive', 'positive', 'positive', 'positive']


100%|██████████| 125/125 [03:08<00:00,  1.50s/it]


['negative', 'negative', 'negative', 'negative']
['positive', 'positive', 'negative', 'negative']


Epoch 3:  91%|█████████ | 228/250 [24:00<02:18,  6.32s/it, loss=0.312]


KeyboardInterrupt: 

In [None]:
training_info

{'train_loss': [0.1478367365965205, 0.0010359602420539887],
 'test_loss': [0.0, 0.0],
 'test_accuracy': [0.0, 0.0]}