<a href="https://colab.research.google.com/github/Jeevesh8/arg_mining/blob/main/experiments/long_context_am.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Install Dependencies

In [None]:
%%capture
#if running on colab, install below 4
#!git clone https://github.com/Jeevesh8/arg_mining
#!pip install transformers
#!pip install seqeval datasets allennlp
#!pip install flax

#if connected to local runtime, run the next command too
#pip install bs4 tensorflow torch 



*   Update ``arg_mining/datasets/cmv_modes/configs.py`` as per your requirements, all experiments considered till now, set ``batch_size`` to 2, and all other variables with their default value.



In [1]:
#Run to ignore warnings
import warnings
warnings.filterwarnings('ignore')

### Load Metric

In [2]:
from datasets import load_metric
metric = load_metric('seqeval')

### Define & Load Tokenizer, Model, Dataset

In [3]:
import torch
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [4]:
device

device(type='cuda', index=0)

In [5]:
model_version = 'allenai/longformer-base-4096'

In [6]:
%%capture
from transformers import LongformerTokenizer, AutoModel
tokenizer = LongformerTokenizer.from_pretrained(model_version)
transformer_model = AutoModel.from_pretrained(model_version).to(device)

In [7]:
%%capture
from arg_mining.datasets.cmv_modes import load_dataset, data_config

tokenizer.add_tokens(data_config["special_tokens"])

transformer_model.resize_token_embeddings(len(tokenizer))

def get_datasets():
    train_dataset, valid_dataset, test_dataset = load_dataset(tokenizer=tokenizer,
                                                              train_sz=80,
                                                              test_sz=20,)
    return train_dataset, valid_dataset, test_dataset

### Define layers for a Linear-Chain-CRF

In [8]:
import torch.nn as nn

from allennlp.modules.conditional_random_field import ConditionalRandomField as crf

ac_dict = data_config["arg_components"]

allowed_transitions =([(ac_dict["B-C"], ac_dict["I-C"]), 
                       (ac_dict["B-P"], ac_dict["I-P"])] + 
                      [(ac_dict["I-C"], ac_dict[ct]) 
                        for ct in ["I-C", "B-C", "B-P", "O"]] +
                      [(ac_dict["I-P"], ac_dict[ct]) 
                        for ct in ["I-P", "B-C", "B-P", "O"]] +
                      [(ac_dict["O"], ac_dict[ct]) 
                        for ct in ["O", "B-C", "B-P"]])
                    
linear_layer = nn.Linear(transformer_model.config.hidden_size,
                         len(ac_dict)).to(device)

crf_layer = crf(num_tags=len(ac_dict),
                constraints=allowed_transitions,
                include_start_end_transitions=False).to(device)

cross_entropy_layer = nn.CrossEntropyLoss(weight=torch.log(torch.tensor([3.8087, 54.6686, 3.4161, 45.3662, 2.4729], 
                                                                        device=device)), reduction='none')

### Global Attention Mask Utility for Longformer

In [9]:
import numpy as np
from threading import Lock
MUTEX = Lock()

def get_global_attention_mask(tokenized_threads: np.ndarray) -> np.ndarray:
    """Returns an attention mask, with 1 where there are [USER{i}] tokens and 
    0 elsewhere.
    """
    mask = np.zeros_like(tokenized_threads)
    for user_token in ["UNU"]+[f"[USER{i}]" for i in range(data_config["max_users"])]:
        MUTEX.acquire()
        try:
            user_token_id = tokenizer.encode(user_token)[1:-1]
        finally:
            MUTEX.release()
        mask = np.where(tokenized_threads==user_token_id, 1, mask)
    return np.array(mask, dtype=bool)

### Loss and Prediction Function

In [10]:
from typing import Tuple

In [11]:
def compute(batch: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
            preds: bool=False, cross_entropy: bool=True):
    """
    Args:
        batch:  A tuple having tokenized thread of shape [batch_size, seq_len],
                component type labels of shape [batch_size, seq_len], and a global
                attention mask for Longformer, of the same shape.
        
        preds:  If True, returns a List(of batch_size size) of Tuples of form 
                (tag_sequence, viterbi_score) where the tag_sequence is the 
                viterbi-decoded sequence, for the corresponding sample in the batch.
        
        cross_entropy:  This argument will only be used if preds=False, i.e., if 
                        loss is being calculated. If True, then cross entropy loss
                        will also be added to the output loss.
    
    Returns:
        Either the predicted sequences with their scores for each element in the batch
        (if preds is True), or the loss value summed over all elements of the batch
        (if preds is False).
    """
    tokenized_threads, comp_type_labels, global_attention_mask = batch
    
    pad_mask = torch.where(tokenized_threads!=tokenizer.pad_token_id, 1, 0)
    
    logits = linear_layer(transformer_model(input_ids=tokenized_threads,
                               attention_mask=pad_mask,
                               global_attention_mask=global_attention_mask).last_hidden_state)
    
    if preds:
        return crf_layer.viterbi_tags(logits, pad_mask)
    
    log_likelihood = crf_layer(logits, comp_type_labels, pad_mask)
    
    if cross_entropy:
        logits = logits.reshape(-1, logits.shape[-1])
        
        pad_mask, comp_type_labels = pad_mask.reshape(-1), comp_type_labels.reshape(-1)
        
        ce_loss = torch.sum(pad_mask*cross_entropy_layer(logits, comp_type_labels))
        
        return ce_loss - log_likelihood

    return -log_likelihood

### Define optimizer

In [12]:
from itertools import chain

import torch.optim as optim

optimizer = optim.AdamW(params = chain(transformer_model.parameters(),
                                      linear_layer.parameters(),
                                      crf_layer.parameters()),
                       lr = 2e-5, 
                       weight_decay = 1e-7)

### Training And Evaluation Loops

In [13]:
def train(dataset):
    accumulate_over = 4
    
    optimizer.zero_grad()

    for i, (tokenized_threads, masked_threads, comp_type_labels, _ ) in enumerate(dataset):
        global_attention_mask = torch.tensor(get_global_attention_mask(tokenized_threads), 
                                             device=device, dtype=torch.int32)
        
        #Remove Device Axis and cast to PyTorch tensor
        tokenized_threads = torch.tensor(np.squeeze(tokenized_threads, axis=0), 
                                        device=device)
        masked_threads = torch.tensor(np.squeeze(masked_threads, axis=0), 
                                     device=device)
        comp_type_labels = torch.tensor(np.squeeze(comp_type_labels, axis=0), 
                                        device=device, dtype=torch.long)

        loss = compute((tokenized_threads, 
                        comp_type_labels, 
                        global_attention_mask))/data_config["batch_size"]
        
        print("Loss: ", loss)
        loss.backward()
        
        if i%accumulate_over==accumulate_over-1:
            optimizer.step()
            optimizer.zero_grad()
    
    optimizer.step()

In [14]:
def evaluate(dataset, metric):
    
    int_to_labels = {v:k for k, v in ac_dict.items()}
    
    for tokenized_threads, masked_threads, comp_type_labels, _ in dataset:
        
        global_attention_mask = torch.tensor(get_global_attention_mask(tokenized_threads), 
                                             device=device)
        
        #Remove Device Axis and cast to PyTorch tensor
        tokenized_threads = torch.tensor(np.squeeze(tokenized_threads, axis=0), 
                                        device=device)
        masked_threads = torch.tensor(np.squeeze(masked_threads, axis=0), 
                                     device=device)
        comp_type_labels = torch.tensor(np.squeeze(comp_type_labels, axis=0), 
                                        device=device)

        preds = compute((tokenized_threads,
                         comp_type_labels,
                         global_attention_mask),
                        preds=True)
        
        lengths = torch.sum(torch.where(tokenized_threads!=tokenizer.pad_token_id, 1, 0), 
                            axis=-1)
        
        preds = [ [int_to_labels[pred] for pred in pred[0][:lengths[i]]]
                  for i, pred in enumerate(preds)
                ]
        
        refs = [ [int_to_labels[ref] for ref in labels[:lengths[i]]]
                 for i, labels in enumerate(comp_type_labels.cpu().tolist())
               ]
        
        metric.add_batch(predictions=preds, 
                         references=refs)
    
    print(metric.compute())

### Final Training

In [15]:
n_epochs = 25

In [16]:
for epoch in range(n_epochs):
    print(f"------------EPOCH {epoch+1}---------------")
    train_dataset, _, test_dataset = get_datasets()
    train(train_dataset)
    evaluate(test_dataset, metric)

------------EPOCH 1---------------


fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.
2021-08-25 05:51:50.835721: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2021-08-25 05:51:50.836507: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1835] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Loss:  tensor(3231.1399, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2643.7749, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3479.6191, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3222.1128, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2114.4832, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2084.6899, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2665.1609, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2929.5229, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1902.2341, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1914.3705, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2024.3479, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3285.8921, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2906.3728, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4192.4355, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3961.7925, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  ten

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(2539.8989, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2013.8823, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2727.1030, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2327.2808, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1638.6561, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1626.0693, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2028.9069, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2194.4111, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1555.0105, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1661.2297, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1714.0554, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2617.7202, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2382.6133, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3531.3945, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3391.4980, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  ten

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(2313.9463, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1876.0498, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2583.8828, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2193.5020, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1587.1233, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1570.0298, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1912.1627, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1996.6531, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1430.1011, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1602.4395, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1682.0670, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2508.3589, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2085.0862, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3222.8579, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3210.1162, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  ten

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(2019.0935, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1738.1915, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2210.3552, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1940.8076, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1362.2400, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1320.7841, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1638.9731, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1816.3320, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1191.9192, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1382.3154, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1423.5159, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2248.5669, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1738.6730, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2872.5747, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2966.2756, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  ten

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1806.7792, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1489.5754, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1913.1084, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1530.9395, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(906.3848, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(886.9040, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1278.3945, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1613.7511, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1008.1056, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1039.0198, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1085.0454, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1829.5046, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1416.1072, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2522.0520, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2687.1997, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tenso

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1682.7859, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1465.9821, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1626.2590, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1478.3635, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(614.3351, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(602.5514, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(970.9431, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1259.3059, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(776.3800, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(840.4431, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(847.2701, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1560.0315, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1202.2020, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2293.1067, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2451.1992, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1733.9611, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1293.0796, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1909.7206, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1158.9556, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(476.6976, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(385.7607, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(892.2975, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1069.0592, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(603.3539, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(735.4478, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(793.9445, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1567.7889, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1079.3076, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2134.4661, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2445.5234, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1298.2214, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1037.2869, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1411.0254, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(837.5746, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(346.3130, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(284.9121, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(736.0726, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(909.0273, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(500.3460, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(526.1887, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(629.0630, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1628.0581, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1161.5444, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2143.5059, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2186.7446, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(917.

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1372.9120, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1259.2676, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1473.0111, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(820.1859, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(467.6367, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(374.6788, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(671.2092, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(832.7212, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(391.1519, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(513.4558, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(536.2056, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(945.4622, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(901.4368, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1848.2660, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2063.6819, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(635.01

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1223.1865, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(910.5686, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1445.6187, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(620.0428, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(328.6167, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(268.9060, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(452.5624, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(779.6096, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(344.2813, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(371.4417, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(463.4051, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(654.3734, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(888.3119, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1820.6836, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1926.5317, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(484.003

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(817.3552, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(699.0068, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(782.6588, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(428.7488, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(260.2024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(180.2295, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(457.0162, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(681.4454, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(369.8967, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(330.7101, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(527.5382, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1015.9952, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(889.0116, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1775.5472, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1853.6847, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(542.8181

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(702.3977, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(534.3939, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(732.2961, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(347.6219, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(598.5953, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(344.7399, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(876.1830, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(947.8806, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(495.9164, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(451.5440, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(465.7213, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(918.0230, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1006.8957, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2086.2354, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2234.7629, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(906.9459

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(563.1514, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(489.6980, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(675.6349, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(452.3225, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(280.5584, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(182.0721, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(415.6061, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(655.3388, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(311.1825, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(239.7212, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(291.7130, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(538.4495, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(874.6517, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1875.3607, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2101.0493, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(755.5399,

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1641.7219, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1509.0559, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1396.5038, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1692.1191, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(524.6027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(401.2540, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(806.2423, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1345.8363, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(716.0950, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1097.7949, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(835.3970, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1453.2271, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1108.4686, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2142.9395, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2075.5225, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(616.0391, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(389.7719, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(672.3148, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(348.9644, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(200.2737, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(125.1553, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(285.7153, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(636.7355, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(209.0074, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(183.7372, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(258.8173, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(524.9786, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(739.3600, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1653.1915, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1772.0281, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(381.7366,

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(400.7218, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(279.6665, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(390.8533, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(280.8203, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(169.5065, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(84.4148, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(171.5501, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(520.5601, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(129.6020, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(120.0835, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(144.9533, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(378.0739, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(683.1885, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1622.4089, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1679.7595, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(249.6454, 

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(219.6443, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(144.2410, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(216.8869, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(136.5442, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(131.4236, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(80.5196, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(136.0170, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(462.9922, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(92.9022, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(76.1417, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(79.5809, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(194.5751, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(643.0984, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1580.3085, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1624.5525, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(154.0397, dev

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(156.6713, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(123.3083, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(168.6900, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(108.4920, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(98.5903, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(60.8018, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(110.7041, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(430.1950, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(63.2227, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(69.1770, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(59.5758, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(156.8360, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(643.2096, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1544.7249, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1601.0931, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(142.5560, devi

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(138.1288, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(108.7776, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(150.2469, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(102.2578, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(79.1964, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(52.3483, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(100.0655, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(419.4328, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(55.3125, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(58.4644, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(44.8353, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(130.7500, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(629.6997, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1526.8485, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1576.7375, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(111.2580, devi

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(99.3328, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(84.1130, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(107.3173, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(75.1757, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(60.9385, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(44.7690, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(79.8436, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(390.5320, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(40.2857, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(43.4623, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(40.1117, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(101.6288, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(617.1881, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1519.3081, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1552.6926, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(86.0787, device='c

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(72.9100, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(73.3607, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(85.3707, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(50.0566, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(47.5372, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(36.6202, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(67.4439, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(376.4889, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(31.3162, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(34.2690, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(34.4106, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(81.5296, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(604.1697, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1504.1719, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1531.3901, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(67.8640, device='cud

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(56.8305, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(59.9157, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(70.2421, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(45.1465, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(38.6543, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(29.9708, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(57.0027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(369.8923, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.9212, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(28.0282, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.6013, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(53.2246, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(594.5762, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1485.8546, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1514.5293, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(55.1066, device='cud

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(42.5628, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(50.3781, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(63.6691, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(31.5774, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(33.3112, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(27.2876, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(45.0443, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(361.5530, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(20.1241, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.8311, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(20.8862, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(40.3065, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(589.8832, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1472.4177, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1505.6975, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(44.8055, device='cud

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(37.4467, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(38.3426, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(46.4677, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(26.5091, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(29.2047, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.9625, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(37.5225, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(346.8550, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.5343, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.4218, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.9389, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(35.4801, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(581.9138, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1459.7927, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1484.4738, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(48.8235, device='cud

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(42.4605, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(35.4201, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(32.3064, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(24.6893, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.5334, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.1771, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(29.0076, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(337.2578, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.5480, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.4571, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.2739, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(32.0493, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(575.6185, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1452.4900, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1469.2528, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(40.6803, device='cud

In [17]:
for epoch in range(n_epochs-10):
    print(f"------------EPOCH {epoch+1+25}---------------")
    train_dataset, _, test_dataset = get_datasets()
    train(train_dataset)
    evaluate(test_dataset, metric)

------------EPOCH 26---------------


fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(46.7878, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(32.0810, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(32.0098, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(41.8475, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(29.3289, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.0600, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(30.7739, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(332.6900, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.5055, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(20.6990, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(28.5847, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(42.5827, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(570.0729, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1446.2185, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1459.5818, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(34.1197, device='cud

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(32.5197, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.5947, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.8253, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.3939, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.8054, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.9277, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.9736, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(323.7236, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.1145, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.8164, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.2888, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(40.4683, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(575.9846, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1467.7380, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1478.1648, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(52.3935, device='cud

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(83.6874, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(52.4555, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(68.4501, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.1880, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.2389, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(28.2311, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(51.2480, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(371.7253, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.6209, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.8149, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.3843, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(32.9038, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(561.0543, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1435.8906, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1438.8295, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(35.6570, device='cud

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(56.3413, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(33.0295, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(55.6906, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.1906, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.4116, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(26.6484, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(43.3887, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(341.1588, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(31.2470, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.3977, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(28.7316, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(51.5816, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(581.5341, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1431.4913, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1478.4310, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(85.4456, device='cud

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(67.0038, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.4929, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(38.2584, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.9386, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.8415, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.1725, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(26.0801, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(309.7857, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.6515, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.7419, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.6071, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(24.4310, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(564.8584, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1425.5353, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1425.7676, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(50.8991, device='cuda

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(115.4933, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(90.9942, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(141.5643, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(93.7570, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(55.7487, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(38.0771, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(42.8727, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(321.8163, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.5948, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(32.9250, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(32.1252, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(56.3413, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(553.1796, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1414.5964, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1403.5117, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(31.4615, device='c

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(47.6602, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(37.7666, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(39.3162, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.6104, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.2693, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.2557, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(33.2741, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(310.8475, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.0043, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.1775, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.3366, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.8491, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(546.0702, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1415.6370, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1393.7828, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(29.5528, device='cud

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(16.2093, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.6978, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.4705, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.0351, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.0506, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.0043, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.1271, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(297.2859, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.5930, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.9214, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.8980, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.6927, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(550.0953, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1381.0851, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1386.9020, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.3860, device='cuda

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(14.1646, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.5298, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.7888, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.8662, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.6161, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.1733, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.1352, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(288.7253, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.9171, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.3839, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.9802, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.0995, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(533.4760, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1396.6716, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1369.3289, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.3279, device='cuda

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(12.0370, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.9484, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.4265, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.2233, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.4968, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.2007, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.5924, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(284.9231, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.4624, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.5295, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.9955, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.0239, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(530.8842, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1393.2671, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1359.5459, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.7021, device='cuda

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(10.4475, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.9811, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.8278, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.4579, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.0834, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.4425, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.2658, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(281.4789, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.5868, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.5120, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.1006, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.0120, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(526.4384, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1382.2507, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1345.8575, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.2297, device='cuda:0

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(8.8515, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.1953, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.9913, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.5130, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.6231, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.9802, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.7986, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(278.5939, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.4639, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.4434, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.3280, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.1401, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(519.8713, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1376.8961, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1338.3448, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.9196, device='cuda:0', gr

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(11.8181, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.5429, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.2648, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.9540, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.1525, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.5541, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.4768, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(274.3262, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.6104, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.9968, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(6.8722, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.1986, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(516.3518, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1374.9719, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1323.2546, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.9742, device='cuda:0', gr

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(7.0057, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.9521, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.7589, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.9982, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.6007, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.9460, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.0923, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(271.9830, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.2796, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(6.0970, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.5677, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.3212, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(509.5109, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1373.2252, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1316.2278, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.2161, device='cuda:0', gra

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(5.9545, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.3894, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.3463, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.3966, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.2059, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.4314, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.7198, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(268.0307, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.0803, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.4525, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.2447, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.4803, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(506.0266, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1351.5155, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1296.5713, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.6786, device='cuda:0', grad