<a href="https://colab.research.google.com/github/Jeevesh8/arg_mining/blob/main/experiments/long_context_am.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Install Dependencies

In [None]:
%%capture
#if running on colab, install below 4
#!git clone https://github.com/Jeevesh8/arg_mining
#!pip install transformers
#!pip install seqeval datasets allennlp
#!pip install flax

#if connected to local runtime, run the next command too
#pip install bs4 tensorflow torch 



*   Update ``arg_mining/datasets/cmv_modes/configs.py`` as per your requirements, all experiments considered till now, set ``batch_size`` to 2, and all other variables with their default value.



In [1]:
#Run to ignore warnings
import warnings
warnings.filterwarnings('ignore')

### Load Metric

In [2]:
from datasets import load_metric
metric = load_metric('seqeval')

### Define & Load Tokenizer, Model, Dataset

In [3]:
import torch
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [4]:
device

device(type='cuda', index=0)

In [5]:
model_version = 'allenai/longformer-base-4096'

In [6]:
%%capture
from transformers import LongformerTokenizer, AutoModel
tokenizer = LongformerTokenizer.from_pretrained(model_version)
transformer_model = AutoModel.from_pretrained(model_version).to(device)

In [13]:
import torch.nn as nn

#Extend Token Type Embeddings
def resize_token_type_embeddings(transformer_model, new_size):
    old_embeddings = transformer_model.embeddings.token_type_embeddings.weight
    old_size, hidden_dim = old_embeddings.shape
    transformer_model.embeddings.token_type_embeddings = nn.Embedding(new_size, hidden_dim, device=transformer_model.device)
    with torch.no_grad():
        transformer_model.embeddings.token_type_embeddings.weight[:old_size] = old_embeddings

resize_token_type_embeddings(transformer_model, 2)

In [8]:
with open('./Discourse_Markers.txt') as f:
    discourse_markers = [dm.strip() for dm in f.readlines()]

In [9]:
%%capture
from arg_mining.datasets.cmv_modes import load_dataset, data_config

tokenizer.add_tokens(data_config["special_tokens"])

transformer_model.resize_token_embeddings(len(tokenizer))

def get_datasets():
    train_dataset, valid_dataset, test_dataset = load_dataset(tokenizer=tokenizer,
                                                              train_sz=80,
                                                              test_sz=20,
                                                              mask_tokens=discourse_markers)
    return train_dataset, valid_dataset, test_dataset

### Define layers for a Linear-Chain-CRF

In [10]:
from allennlp.modules.conditional_random_field import ConditionalRandomField as crf

ac_dict = data_config["arg_components"]

allowed_transitions =([(ac_dict["B-C"], ac_dict["I-C"]), 
                       (ac_dict["B-P"], ac_dict["I-P"])] + 
                      [(ac_dict["I-C"], ac_dict[ct]) 
                        for ct in ["I-C", "B-C", "B-P", "O"]] +
                      [(ac_dict["I-P"], ac_dict[ct]) 
                        for ct in ["I-P", "B-C", "B-P", "O"]] +
                      [(ac_dict["O"], ac_dict[ct]) 
                        for ct in ["O", "B-C", "B-P"]])
                    
linear_layer = nn.Linear(transformer_model.config.hidden_size,
                         len(ac_dict)).to(device)

crf_layer = crf(num_tags=len(ac_dict),
                constraints=allowed_transitions,
                include_start_end_transitions=False).to(device)

cross_entropy_layer = nn.CrossEntropyLoss(weight=torch.log(torch.tensor([3.3102, 61.4809, 3.6832, 49.6827, 2.5639], 
                                                                        device=device)), reduction='none')

### Global Attention Mask Utility for Longformer

In [11]:
import numpy as np
from threading import Lock

def get_global_attention_mask(tokenized_threads: np.ndarray) -> np.ndarray:
    """Returns an attention mask, with 1 where there are [USER{i}] tokens and 
    0 elsewhere.
    """
    mask = np.zeros_like(tokenized_threads)
    for user_token in ["UNU"]+[f"[USER{i}]" for i in range(data_config["max_users"])]:
        user_token_id = tokenizer.encode(user_token)[1:-1]
        mask = np.where(tokenized_threads==user_token_id, 1, mask)
    return np.array(mask, dtype=bool)

### Loss and Prediction Function

In [12]:
from typing import Tuple

In [14]:
def compute(batch: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
            preds: bool=False, cross_entropy: bool=True):
    """
    Args:
        batch:  A tuple having tokenized thread of shape [batch_size, seq_len],
                component type labels of shape [batch_size, seq_len], and a global
                attention mask for Longformer, of the same shape.
        
        preds:  If True, returns a List(of batch_size size) of Tuples of form 
                (tag_sequence, viterbi_score) where the tag_sequence is the 
                viterbi-decoded sequence, for the corresponding sample in the batch.
        
        cross_entropy:  This argument will only be used if preds=False, i.e., if 
                        loss is being calculated. If True, then cross entropy loss
                        will also be added to the output loss.
    
    Returns:
        Either the predicted sequences with their scores for each element in the batch
        (if preds is True), or the loss value summed over all elements of the batch
        (if preds is False).
    """
    tokenized_threads, token_type_ids, comp_type_labels, global_attention_mask = batch
    
    pad_mask = torch.where(tokenized_threads!=tokenizer.pad_token_id, 1, 0)
    
    logits = linear_layer(transformer_model(input_ids=tokenized_threads,
                               attention_mask=pad_mask,
                               global_attention_mask=global_attention_mask).last_hidden_state)
    
    if preds:
        return crf_layer.viterbi_tags(logits, pad_mask)
    
    log_likelihood = crf_layer(logits, comp_type_labels, pad_mask)
    
    if cross_entropy:
        logits = logits.reshape(-1, logits.shape[-1])
        
        pad_mask, comp_type_labels = pad_mask.reshape(-1), comp_type_labels.reshape(-1)
        
        ce_loss = torch.sum(pad_mask*cross_entropy_layer(logits, comp_type_labels))
        
        return ce_loss - log_likelihood

    return -log_likelihood

### Define optimizer

In [15]:
from itertools import chain

import torch.optim as optim

optimizer = optim.Adam(params = chain(transformer_model.parameters(),
                                      linear_layer.parameters(),
                                      crf_layer.parameters()),
                       lr = 2e-5,)

### Training And Evaluation Loops

In [16]:
def train(dataset):
    accumulate_over = 4
    
    optimizer.zero_grad()

    for i, (tokenized_threads, masked_threads, comp_type_labels, _ ) in enumerate(dataset):
        global_attention_mask = torch.tensor(get_global_attention_mask(tokenized_threads),
                                             device=device, dtype=torch.int32)
        
        #Remove Device Axis and cast to PyTorch tensor
        tokenized_threads = torch.tensor(np.squeeze(tokenized_threads, axis=0), 
                                         device=device)
        masked_threads = torch.tensor(np.squeeze(masked_threads, axis=0), 
                                      device=device)
        comp_type_labels = torch.tensor(np.squeeze(comp_type_labels, axis=0), 
                                        device=device, dtype=torch.long)
        
        global_attention_mask = torch.squeeze(global_attention_mask, dim=0)
        
        loss = compute((tokenized_threads,
                        torch.where(masked_threads==tokenizer.mask_token_id, 1, 0), 
                        comp_type_labels, 
                        global_attention_mask))/data_config["batch_size"]
        
        print("Loss: ", loss)
        loss.backward()
        
        if i%accumulate_over==accumulate_over-1:
            optimizer.step()
            optimizer.zero_grad()
    
    optimizer.step()

In [17]:
transformer_model.config.type_vocab_size = 2

In [18]:
def evaluate(dataset, metric):
    
    int_to_labels = {v:k for k, v in ac_dict.items()}
    
    for tokenized_threads, masked_threads, comp_type_labels, _ in dataset:
        
        global_attention_mask = torch.tensor(get_global_attention_mask(tokenized_threads), 
                                             device=device)
        
        #Remove Device Axis and cast to PyTorch tensor
        tokenized_threads = torch.tensor(np.squeeze(tokenized_threads, axis=0),
                                        device=device)
        masked_threads = torch.tensor(np.squeeze(masked_threads, axis=0),
                                     device=device)
        comp_type_labels = torch.tensor(np.squeeze(comp_type_labels, axis=0),
                                        device=device)
        global_attention_mask = torch.squeeze(global_attention_mask, dim=0)
        
        preds = compute((tokenized_threads,
                         torch.where(masked_threads==tokenizer.mask_token_id, 1, 0), 
                         comp_type_labels,
                         global_attention_mask),
                        preds=True)
        
        lengths = torch.sum(torch.where(tokenized_threads!=tokenizer.pad_token_id, 1, 0), 
                            axis=-1)
        
        preds = [ [int_to_labels[pred] for pred in pred[0][:lengths[i]]]
                  for i, pred in enumerate(preds)
                ]
        
        refs = [ [int_to_labels[ref] for ref in labels[:lengths[i]]]
                 for i, labels in enumerate(comp_type_labels.cpu().tolist())
               ]
        
        metric.add_batch(predictions=preds, 
                         references=refs)
    
    print(metric.compute())

### Final Training

In [19]:
n_epochs = 35

In [20]:
for epoch in range(n_epochs):
    print(f"------------EPOCH {epoch+1}---------------")
    train_dataset, _, test_dataset = get_datasets()
    train(train_dataset)
    evaluate(test_dataset, metric)

------------EPOCH 1---------------


fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.
2021-08-26 10:43:21.880074: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2021-08-26 10:43:21.880870: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1835] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Loss:  tensor(3273.7771, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2606.2212, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3360.3765, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3211.7551, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2050.5786, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2062.1758, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2562.0024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2806.7324, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1786.5348, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1887.1169, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2070.9905, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3208.7817, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2549.1536, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3717.8696, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3535.7019, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  ten

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(2069.5649, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1717.7789, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2256.8770, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2296.1519, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1589.2266, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1561.5105, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1812.2649, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1745.1355, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1230.9990, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1270.0487, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1354.4434, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2230.5303, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1889.0938, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3188.2751, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2685.1611, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  ten

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1942.8868, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1640.8154, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2087.0554, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2329.7019, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1420.9583, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1385.1160, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1501.9059, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1461.2888, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(929.9670, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1047.4957, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1149.8422, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1944.4049, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1379.8240, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2532.8486, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2303.1133, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tens

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1670.4084, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1450.8363, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1823.4182, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1973.5684, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1006.9785, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1003.6052, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1149.0712, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1228.9948, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(746.9715, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(884.7604, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(985.1078, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1676.9832, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(992.0599, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2230.5393, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1937.4857, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1402.5393, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1298.2888, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1571.7515, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1998.6658, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(794.4727, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(840.8977, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1011.5992, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1021.1022, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(638.5054, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(771.2333, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(876.8740, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1436.8210, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(674.5933, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1849.3884, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1559.6481, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(99

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1035.4177, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(996.6566, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1059.1771, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1109.4752, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(535.1336, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(505.0983, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(638.1128, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(755.1273, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(761.1473, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(918.2726, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1074.2253, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1809.5256, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(801.3918, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2834.0164, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1881.5638, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1128

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1157.0774, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1171.6304, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1413.7216, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1265.5835, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(788.9540, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(695.5594, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1070.1494, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(855.2111, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(491.7283, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(587.2975, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(787.8647, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1303.2322, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(559.1653, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1448.4573, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1439.6660, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(833

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(956.0858, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1018.4645, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1147.5098, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(984.0584, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(505.4834, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(392.4149, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(740.6373, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(671.3643, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(349.3976, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(522.1277, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(667.3245, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1485.1960, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(613.1354, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1393.0757, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1225.3136, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(797.61

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(644.3807, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(651.3920, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(671.3005, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(741.9265, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(264.6036, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(257.6412, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(297.5426, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(362.8183, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(284.8850, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(357.2744, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(427.3993, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(779.0334, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(240.6914, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(882.0972, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(736.6971, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(611.4481, d

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(439.7696, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(419.3247, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(499.8988, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(521.0507, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(157.1576, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(134.8849, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(192.6955, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(209.9352, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(161.6550, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(205.4387, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(216.3919, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(490.6340, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(155.3008, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(682.6859, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(424.7952, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(406.0932, d

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(304.9557, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(257.2719, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(354.8711, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(356.1654, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(135.9362, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(112.0341, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(160.3356, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(200.6016, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(189.0099, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(214.6717, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(242.4285, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(440.8788, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(151.0463, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(643.8893, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(459.5208, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(451.8330, d

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(645.2863, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(459.2086, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(794.6348, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(295.9885, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(113.5529, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(96.3329, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(100.6043, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(96.1810, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(70.0812, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(54.9834, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(84.2834, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(244.9960, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(148.1830, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(507.9891, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(292.5650, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(355.8680, device

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(345.7983, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(252.5626, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(482.5202, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(269.6729, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(163.0986, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(139.8755, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(150.3355, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(188.1919, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(102.0108, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(139.6606, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(167.2632, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(396.0324, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(202.4315, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(662.0728, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(403.7138, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(390.0485, d

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(1218.1830, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(833.0222, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1781.3108, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(634.4602, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(525.7552, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(621.4733, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1047.2754, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1089.3859, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(608.5353, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1151.9758, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1283.9849, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1838.6112, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(337.2583, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1126.5515, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1561.4084, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(396.9585, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(402.1981, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(551.2549, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(432.5751, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(133.4307, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(109.0307, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(197.6856, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(290.1785, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(152.5739, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(198.4697, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(247.4213, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(388.5251, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(136.8907, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(467.0963, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(412.7089, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(295.5459, d

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(218.9928, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(214.3186, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(304.2145, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(242.3178, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(98.0751, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(79.1752, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(118.9405, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(132.4555, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(68.7399, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(96.1244, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(108.7983, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(238.2975, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(70.3801, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(327.1703, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(221.9438, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(174.7821, device

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(127.1317, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(171.2763, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(191.1286, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(153.4785, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(75.7292, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(54.5340, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(71.5341, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(68.9015, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(30.7409, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(38.8456, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(61.9548, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(153.8065, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(39.4769, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(238.6476, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(112.5289, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(119.9878, device='c

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(104.4492, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(104.1563, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(135.7469, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(75.3788, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(58.0563, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(39.2300, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(50.1889, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(45.1528, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.2491, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(26.2341, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(36.5224, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(103.2724, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(31.0832, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(182.0343, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(66.4764, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(69.7647, device='cuda

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(71.1709, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(71.0865, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(90.1068, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(56.2890, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(46.6034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(33.5303, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(35.0521, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(27.4228, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.4984, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.7985, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(30.5304, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(81.3534, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.5786, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(132.7538, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(46.7124, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(39.3566, device='cuda:0',

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(49.1695, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(37.8018, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(62.6804, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(44.4046, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(39.9329, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(29.6206, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(28.7169, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.7015, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.8138, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.5011, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(24.6423, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(58.1570, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.0477, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(104.7219, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(40.7644, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.8350, device='cuda:0',

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(34.2957, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(29.4340, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(42.5149, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(36.6051, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(35.2145, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(27.0719, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.6762, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.2035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.1544, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.0664, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(20.4990, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(40.5344, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.3844, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(82.0366, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(34.8940, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.9137, device='cuda:0', g

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(25.6983, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(24.8540, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(30.5634, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(30.9358, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(31.5491, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.2205, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.6220, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.7299, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.9405, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.2639, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.3489, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(34.5496, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.6014, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(68.5113, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(32.1748, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.0393, device='cuda:0', g

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(21.8905, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.1168, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(24.7544, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(27.6834, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(28.1395, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.4413, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.8358, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.9321, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.0961, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.9905, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.7299, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(30.4938, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.5346, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(60.5755, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(30.2994, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.0259, device='cuda:0', gr

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(18.8663, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(20.3545, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.6195, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.0337, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(24.9887, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.5527, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.9864, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.7180, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(6.4572, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.0324, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.6402, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(27.7589, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.8070, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(56.5557, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(28.8871, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.6266, device='cuda:0', gra

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(16.8344, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.0008, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.5890, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.9200, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.0727, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.5402, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.5449, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.6482, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(6.0042, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.2056, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.7149, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(29.1066, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.3495, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(60.7944, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(27.8314, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.5807, device='cuda:0', gra

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(15.4026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.0840, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.0945, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.0235, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.6401, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.7013, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.6799, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.8523, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.6846, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(6.7531, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.6710, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(21.5068, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.1580, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(88.7306, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(29.1561, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.1243, device='cuda:0', gra

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(14.1892, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.2165, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.8193, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(20.2841, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.4245, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.2478, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.6292, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.2661, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.3752, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(6.2648, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.1844, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(20.1052, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.2750, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(73.0164, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(28.5034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.4006, device='cuda:0', gra

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(13.2305, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.5337, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.8447, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.0407, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.5161, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.9268, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.7385, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(6.7700, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.1487, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.8475, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.6322, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(19.0859, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.3373, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(44.0047, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(27.8949, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.8752, device='cuda:0', gra

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(12.3902, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.0418, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.0428, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.7515, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.9444, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.7261, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.0429, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(6.3247, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.8995, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.5465, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.0822, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(18.1119, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.6683, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(47.1047, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(27.2771, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.4403, device='cuda:0', grad

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(11.7731, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.6416, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.5472, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(16.7568, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.7304, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.8507, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.4848, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.9028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.6765, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.3214, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.5834, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(17.3309, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.0416, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(47.5851, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(26.7878, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.0705, device='cuda:0', grad

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(11.2156, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.3189, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.2316, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.5138, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.5937, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.6044, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.1247, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.6208, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.4684, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.0768, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(11.6469, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.3239, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.2964, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(35.0347, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(24.8875, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.5907, device='cuda:0', grad_

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(10.4431, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.2840, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.9877, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.0440, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.7276, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.4836, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.7355, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.3527, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.3590, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.3704, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.0033, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(15.9150, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.6343, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(29.1370, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.8279, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.3079, device='cuda:0', grad_fn

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(10.1082, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.9524, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.7584, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.9597, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.4076, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.0801, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.8489, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.1730, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.3108, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.2192, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.2632, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(14.2010, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.0073, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(25.9204, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.5888, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.0804, device='cuda:0', grad_fn

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(9.8287, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.7227, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.0868, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.0237, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.5525, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.7636, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.5108, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.8563, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.0557, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.9324, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.0698, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(13.4509, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(6.5055, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.0086, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(23.1315, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.4623, device='cuda:0', grad_fn=<

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.


Loss:  tensor(9.6000, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.9377, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(9.6775, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.5700, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.9922, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(10.0189, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.0766, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(5.2239, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.3332, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.7011, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(8.7920, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(12.9144, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(6.4337, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(20.3117, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(22.9691, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(7.2325, device='cuda:0', grad_fn=<D

### Rough -- Checking dataset

In [11]:
def get_datasets():
    train_dataset, valid_dataset, test_dataset = load_dataset(tokenizer=tokenizer,
                                                              train_sz=80,
                                                              test_sz=20,
                                                              mask_tokens=discourse_markers)
    return train_dataset, valid_dataset, test_dataset

In [12]:
train_dataset, _, test_dataset = get_datasets()

fatal: destination path 'change-my-view-modes' already exists and is not an empty directory.
2021-08-26 07:47:02.997781: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2021-08-26 07:47:02.998787: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1835] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [13]:
for tokenized_threads, masked_threads, comp_type_labels, _ in test_dataset:
    tokenized_threads, masked_threads, comp_type_labels = tokenized_threads[0], masked_threads[0], comp_type_labels[0]
    for tokenized_thread, masked_thread, comp_type_label in zip(tokenized_threads, masked_threads, comp_type_labels):
        print(comp_type_label[:100])
        print(tokenized_thread[:100])
        print(tokenizer.decode(tokenized_thread[:100]))
        start, end = 0, 0
        prev_type = "other"
        i = 0
        while i<tokenized_thread.shape[0]:
            if comp_type_label[i]==ac_dict["O"]:
                if prev_type=="other":
                    end += 1
                else:
                    print("Component: ", tokenizer.decode(tokenized_thread[start:end+1]), " of type: ", prev_type, tokenized_thread[start:end+1])
                    print("Masked Component: ", tokenizer.decode(masked_thread[start:end+1]), " of type: ", prev_type, masked_thread[start:end+1])
                    start = i
                    end = i
                    prev_type="other"
                
            if comp_type_label[i] in [ac_dict["B-C"], ac_dict["B-P"]]:
                print("Component: ", tokenizer.decode(tokenized_thread[start:end+1]), " of type: ", prev_type, tokenized_thread[start:end+1])
                print("Masked Component: ", tokenizer.decode(masked_thread[start:end+1]), " of type: ", prev_type, masked_thread[start:end+1])
                start = i
                end = i
                prev_type = "Claim" if comp_type_label[i]==ac_dict["B-C"] else "Premise"
            
            if comp_type_label[i] in [ac_dict["I-C"], ac_dict["I-P"]]:
                end += 1
            
            i+=1
        break
    break

[0 1 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 2 2 2 2 2 0 0 0 0 0 0 1 2 2 2 2 2 2 2 2 2]
[    0 18814   846    35  7978     9  1901    16   145   551   350   444
 50269  1437    96     5    94   367   688    52   348    56    80  1307
  1061  1369    11     5   232     6  1437   258     9    61    58  1726
    30  3510  8941     7  1437    22  3519     9  1901     4  1437    22
  1437    20    78   145     5 11597     9  6366    81    20 21902     6
  1437     8   452     5  1094    23     5  4088     9    10 33937  4320
    11  2201     4  1437  1437  1437   100  1819   923    84   481  1901
  1437    53     7   162  1437  1437  8585    16    10   699   516   227
 20203   110    78  8322]
<s>CMV: Freedom of speech is being taken too far [USER0]   In the last few weeks we've had two huge events happen in the world,  both of which were caused by matters relating to  " freed