In [4]:
import numpy as np
import torch
import torch.nn.functional as F
from typing import List, Optional, Tuple, Union
from transformers import PreTrainedModel, AutoModelForSequenceClassification, AutoConfig
from transformers import AutoTokenizer
from sklearn.metrics import confusion_matrix

import math
import os
import pandas as pd
import json
import datasets

from typing import List, Optional, Tuple, Union
from transformers import BertForSequenceClassification
import transformers
from transformers.modeling_outputs import SequenceClassifierOutput

In [7]:
from transformers import AutoModelForSequenceClassification
from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
import sys
sys.path.append('..')
from modeling_rmt import RMTEncoderForSequenceClassification
from modeling_rmt_enc_dec import RMTEncoderDecoderForConditionalGeneration

## RMT

In [8]:
# !ls /home/bulatov/bulatov/rmt_internship/finetune/contract_nli/bert-base-cased/lr1e-05_constant_with_warmup_adamw_wd1e-03_1452_mem25_sum_loss/run_3/model_best.pth
# !ls /home/bulatov/bulatov/rmt_internship/finetune/contract_nli/bert-base-cased/lr1e-05_linear_adamw_wd1e-03_968_mem25_sum_loss/run_3/

In [9]:
# cpt_path = "/cephfs/home/bulatov/bulatov/rmt_internship/finetune/contract_nli/t5-base/lr1e-05_linear_adamw_wd1e-03_972_mem25_sum_loss/run_1/model_best.pth"

model_name = 'bert-base-cased'

experiment_path = "/home/bulatov/bulatov/rmt_internship/finetune/contract_nli/bert-base-cased/lr1e-05_linear_adamw_wd1e-03_968_mem25_sum_loss/run_3/"
cpt_path = os.path.join(experiment_path, "model_best.pth")
config_path = os.path.join(experiment_path, "config.json")
cpt = torch.load(cpt_path, map_location='cpu')

with open(config_path, 'r') as f:
    d = json.load(f)

rmt = RMTEncoderForSequenceClassification.from_pretrained(model_name, num_labels=3)
tokenizer = AutoTokenizer.from_pretrained(model_name)

set_params_kwargs = {k:v for k,v in d.items() if k in rmt.set_params.__code__.co_varnames}
set_params_kwargs['segment_ordering'] = 'regular'
set_params_kwargs['inter_layer_memory'] = False
set_params_kwargs['tokenizer'] = tokenizer        

rmt.set_params(**set_params_kwargs)
rmt.load_state_dict(cpt['model_state_dict'])

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

<All keys matched successfully>

### Baseline

In [10]:
model_name = 'bert-base-cased'

experiment_path = "/home/bulatov/bulatov/runs/finetune/debug/contract_nli/bert-base-cased/lr1e-05_linear_adamw_wd1e-03_512_mem/run_1/"
cpt_path = os.path.join(experiment_path, "model_best.pth")
config_path = os.path.join(experiment_path, "config.json")
cpt = torch.load(cpt_path, map_location='cpu')

with open(config_path, 'r') as f:
    d = json.load(f)

baseline = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
tokenizer = AutoTokenizer.from_pretrained(model_name)

baseline.load_state_dict(cpt['model_state_dict'])

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

<All keys matched successfully>

### encoder. cnli

In [74]:
input_seq_len = d['input_seq_len'] * 10
target_seq_len = 2
batch_size = 2

device = torch.device(0)

encode_plus_kwargs = {'max_length': input_seq_len,
                              'truncation': True,
                              'padding': 'longest',
                              'pad_to_multiple_of': 1}
generate_kwargs = {}
labels_map = {'Contradiction': 0, 'Entailment': 1, 'Not mentioned': 2}
num_labels = len(labels_map)

def collate_fn(batch):
    # cut too long strings because they may slow down tokenization
    inputs = [b['input'][:input_seq_len * 10] for b in batch]
    labels = [b['output'][:target_seq_len * 10] for b in batch]
    features = tokenizer.batch_encode_plus(list(inputs), return_tensors='pt', **encode_plus_kwargs)
    labels = np.array([labels_map[t] for t in labels])
    features['labels'] = torch.from_numpy(labels)
    features['id'] = [b['id'] for b in batch]
    features['pid'] = [b['pid'] for b in batch]
    
    return features

In [75]:
dataset = datasets.load_dataset('tau/scrolls', 'contract_nli')
train_dataset = dataset['train']

train_sampler = RandomSampler(train_dataset,)
kwargs = {'pin_memory': True, 'num_workers': 0}
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler,
                                collate_fn=collate_fn, **kwargs)

valid_dataset = dataset['validation']
valid_sampler = RandomSampler(valid_dataset)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, sampler=train_sampler,
                                collate_fn=collate_fn, **kwargs)

Reusing dataset scrolls (/home/bulatov/.cache/huggingface/datasets/tau___scrolls/contract_nli/1.0.0/672021d5d8e1edff998a6ea7a5bff35fdfd0ae243e7cf6a8c88a57a04afb46ac)


  0%|          | 0/3 [00:00<?, ?it/s]

In [76]:
len(train_dataset['pid']), len(valid_dataset['pid'])

(7191, 1037)

In [77]:
gen = iter(train_dataloader)

In [78]:
sample = next(gen)
sample['input_ids'].shape

torch.Size([2, 2496])

In [82]:
# list(sample['attention_mask'][0].numpy())

### predictions from all segments

In [103]:
def __call__(self, input_ids, return_all_segments=False, **kwargs):
    memory = self.set_memory()
    segmented = self.pad_and_segment(input_ids)
    segmented = list(zip(*segmented))

    if self.segment_ordering in {'regular', 'last_memory_only'}:
        pass
    elif self.segment_ordering == 'reversed':
        segmented = segmented[::-1]
    elif self.segment_ordering == 'bidirectional':
        segmented = segmented + segmented[::-1][1:]
    elif self.segment_ordering == 'repeat_first':
        segmented = segmented + segmented[:1]
    else:
        raise ValueError(f'Unknown segment ordering: {self.segment_ordering}')

    self.memory_storage = {'num_mem_tokens': self.num_mem_tokens}
    outputs = []
    for seg_num, segment_data in enumerate(segmented):
        input_ids, attention_mask, token_type_ids = segment_data
        if memory.ndim == 2:
            memory = memory.repeat(input_ids.shape[0], 1, 1)
        if (self.bptt_depth > -1) and (len(segmented) - seg_num > self.bptt_depth): 
            memory = memory.detach()

        seg_kwargs = dict(**kwargs)
        if self.drop_empty_segments:
            non_empty_mask = [not torch.equal(input_ids[i], self.empty) for i in range(len(input_ids))]
            if sum(non_empty_mask) == 0:
                continue
            input_ids = input_ids[non_empty_mask]
            attention_mask = attention_mask[non_empty_mask]
            token_type_ids = token_type_ids[non_empty_mask]
            seg_kwargs['labels'] = seg_kwargs['labels'][non_empty_mask]

            inputs_embeds = self.base_model.embeddings.word_embeddings(input_ids)
            inputs_embeds[:, 1:1+self.num_mem_tokens] = memory[non_empty_mask]
        else:
            inputs_embeds = self.base_model.embeddings.word_embeddings(input_ids)
            inputs_embeds[:, 1:1+self.num_mem_tokens] = memory

        seg_kwargs['inputs_embeds'] = inputs_embeds
        seg_kwargs['attention_mask'] = attention_mask
        seg_kwargs['token_type_ids'] = token_type_ids

        out = self.model.forward(**seg_kwargs, output_hidden_states=True)
        outputs.append(out)

        if self.drop_empty_segments:
            memory[non_empty_mask] = out.hidden_states[-1][:, :self.num_mem_tokens]
            out['non_empty_mask'] = non_empty_mask
        else:
            memory = out.hidden_states[-1][:, :self.num_mem_tokens]

    for i, o in enumerate(outputs):
        out[f'loss_{i}'] = o['loss'].mean()

    if self.sum_loss:
        out['loss'] = torch.stack([o['loss'] for o in outputs]).sum(dim=-1)

    if return_all_segments:
        return out, outputs
    
    return out

### get predictions

In [104]:
import pandas as pd
def evaluate(output, sample):
    labels = sample['labels']
    logits = o['logits']
    preds = torch.argmax(logits, dim=1)
    correct_mask = preds == labels[output['non_empty_mask']]
    return correct_mask

In [105]:
train_dataset

Dataset({
    features: ['id', 'pid', 'input', 'output'],
    num_rows: 7191
})

In [106]:
id2label_train = dict(zip(train_dataset['id'], train_dataset['output']))
id2label_valid = dict(zip(valid_dataset['id'], valid_dataset['output']))

id2text_train = dict(zip(train_dataset['id'], train_dataset['input']))
id2text_valid = dict(zip(valid_dataset['id'], valid_dataset['input']))

In [107]:
def evaluate_model(model, dataloader, max_it=10000,):
    it = 0
    
    res_df = pd.DataFrame()
    gen = iter(dataloader)
    for sample in gen:
        ids, pids = sample.pop('id'), sample.pop('pid')
        for key in sample:
            sample[key] = sample[key].to(device)
        out, outputs = __call__(model, **sample, return_all_segments=True)    

        preds, labels = [], []
        for i, o in enumerate(outputs):
            logits = o['logits']

            seg_labels = [s.item() for s in sample['labels']]
            seg_preds = [p.item() for p in torch.argmax(logits, dim=1)]
            labels.append(seg_labels)
            preds.append(seg_preds)

        res_dict = {'ids': ids}

        res_dict.update({f'pred_seg_{i}':v for i,v in enumerate(preds)})
        res_dict.update({f'labels_seg_{i}':v for i,v in enumerate(labels)})

        res_df = res_df.append(pd.DataFrame(res_dict), ignore_index=True)


        it += 1
        if it > max_it:
            break
            
    return res_df

In [59]:
%%time
it = 0
max_it = 10000

rmt.to(device=device)
rmt.drop_empty_segments = False
# sampler = RandomSampler(train_dataset)
# dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler,
#                                 collate_fn=collate_fn, **kwargs)
sampler = RandomSampler(valid_dataset)
dataloader = DataLoader(valid_dataset, batch_size=batch_size, sampler=sampler,
                                collate_fn=collate_fn, **kwargs)

rmt_res_df = evaluate_model(rmt, dataloader, max_it=max_it)

CPU times: user 53.3 s, sys: 420 ms, total: 53.7 s
Wall time: 49.9 s


In [None]:
# gen = iter(dataloader)
# sample = next(gen)

In [None]:
# sample['input_ids'].shape

torch.Size([2, 968])

In [72]:
# rmt_res_df

In [61]:
# rmt_res_df.to_csv('tables/cnli-train-rm-bert-968-25.csv', index=False)
rmt_res_df.to_csv('tables/cnli-valid-rm-bert-968-25.csv', index=False)

In [67]:
model = baseline
model.to(device=device)
model.drop_empty_segments = False

# sampler = RandomSampler(train_dataset)
# dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler,
#                                 collate_fn=collate_fn, **kwargs)
sampler = RandomSampler(valid_dataset)
dataloader = DataLoader(valid_dataset, batch_size=batch_size, sampler=sampler,
                                collate_fn=collate_fn, **kwargs)

# baseline_df = evaluate_model(model, dataloader, max_it=100, forward_func=model.__call__)

it = 0
max_it=10000
    
res_df = pd.DataFrame()
gen = iter(dataloader)
for sample in gen:
    ids, pids = sample.pop('id'), sample.pop('pid')
    for key in sample:
        sample[key] = sample[key].to(device)
    o = model.forward(**sample)    

    preds, labels = [], []

    logits = o['logits']

    seg_labels = [s.item() for s in sample['labels']]
    seg_preds = [p.item() for p in torch.argmax(logits, dim=1)]
    labels.append(seg_labels)
    preds.append(seg_preds)

    res_dict = {'ids': ids}

    res_dict.update({'pred_seg':v for i,v in enumerate(preds)})
    res_dict.update({'labels_seg':v for i,v in enumerate(labels)})

    res_df = res_df.append(pd.DataFrame(res_dict), ignore_index=True)


    it += 1
    if it > max_it:
        break

In [73]:
# res_df

In [71]:
# res_df.to_csv('tables/cnli-train-bert.csv', index=False)
# res_df.to_csv('tables/cnli-valid-bert.csv', index=False)

## Interpret

#### Train

In [58]:
rmt_df = pd.read_csv('tables/cnli-train-rm-bert-968-25.csv')
baseline_df = pd.read_csv('tables/cnli-train-bert.csv')

In [59]:
# rmt_df.pred_seg_0.value_counts(), rmt_df.pred_seg_1.value_counts()

In [60]:
rmt_df['correct_seg_0'] = rmt_df.pred_seg_0 == rmt_df.labels_seg_0
rmt_df['correct_seg_1'] = rmt_df.pred_seg_1 == rmt_df.labels_seg_1

baseline_df['correct_seg'] = baseline_df.pred_seg == baseline_df.labels_seg

rmt_correct_ids_seg_0 = set(rmt_df[rmt_df.correct_seg_0].ids)
rmt_correct_ids_seg_1 = set(rmt_df[rmt_df.correct_seg_1].ids)
bl_correct_ids = set(baseline_df[baseline_df.correct_seg].ids)

In [61]:
rmt_lose_seg_0 = bl_correct_ids.difference(rmt_correct_ids_seg_0)
rmt_lose_seg_1 = bl_correct_ids.difference(rmt_correct_ids_seg_1)
baseline_lose = rmt_correct_ids_seg_1.difference(bl_correct_ids)

In [62]:
print(f'rmt loses: {len(rmt_lose_seg_1)}, baseline loses: {len(baseline_lose)}')

rmt loses: 558, baseline loses: 380


In [64]:
# baseline 

accuracy = (baseline_df.pred_seg == baseline_df.labels_seg).mean()
conf_mat = confusion_matrix(y_pred=baseline_df.pred_seg, y_true=baseline_df.labels_seg, )
conf_mat_normalized = (conf_mat / conf_mat.sum(axis=1).reshape(-1, 1)).round(2)
print(f'Accuracy: {accuracy}\n\nConfusion matrix: \n{conf_mat}\nNormalized: \n{conf_mat_normalized}')

Accuracy: 0.8050340703657349

Confusion matrix: 
[[ 645   75  121]
 [ 125 3074  331]
 [ 193  557 2070]]
Normalized: 
[[0.77 0.09 0.14]
 [0.04 0.87 0.09]
 [0.07 0.2  0.73]]


In [65]:
# rmt

df = rmt_df 
for l in [0, 1]:
    accuracy = (df[f'pred_seg_{l}'] == df[f'labels_seg_{l}']).mean()
    conf_mat = confusion_matrix(y_pred=df[f'pred_seg_{l}'], y_true=df[f'labels_seg_{l}'])
    conf_mat_normalized = (conf_mat / conf_mat.sum(axis=1).reshape(-1, 1)).round(2)
    print(f'\nSegment {l}\nAccuracy: {accuracy}\n\nConfusion matrix: \n{conf_mat}\nNormalized: \n{conf_mat_normalized}')


Segment 0
Accuracy: 0.7916840495063273

Confusion matrix: 
[[ 648   58  135]
 [ 124 2792  614]
 [ 203  364 2253]]
Normalized: 
[[0.77 0.07 0.16]
 [0.04 0.79 0.17]
 [0.07 0.13 0.8 ]]

Segment 1
Accuracy: 0.7802809066889167

Confusion matrix: 
[[ 647   54  140]
 [ 132 2734  664]
 [ 180  410 2230]]
Normalized: 
[[0.77 0.06 0.17]
 [0.04 0.77 0.19]
 [0.06 0.15 0.79]]


#### Valid

In [77]:
rmt_df = pd.read_csv('tables/cnli-valid-rm-bert-968-25.csv')
baseline_df = pd.read_csv('tables/cnli-valid-bert.csv')

In [78]:
rmt_df['correct_seg_0'] = rmt_df.pred_seg_0 == rmt_df.labels_seg_0
rmt_df['correct_seg_1'] = rmt_df.pred_seg_1 == rmt_df.labels_seg_1

baseline_df['correct_seg'] = baseline_df.pred_seg == baseline_df.labels_seg

rmt_correct_ids_seg_0 = set(rmt_df[rmt_df.correct_seg_0].ids)
rmt_correct_ids_seg_1 = set(rmt_df[rmt_df.correct_seg_1].ids)
bl_correct_ids = set(baseline_df[baseline_df.correct_seg].ids)

rmt_lose_seg_0 = bl_correct_ids.difference(rmt_correct_ids_seg_0)
rmt_lose_seg_1 = bl_correct_ids.difference(rmt_correct_ids_seg_1)
baseline_lose = rmt_correct_ids_seg_1.difference(bl_correct_ids)
baseline_lose_seg_0 = rmt_correct_ids_seg_0.difference(bl_correct_ids)

print(f'rmt loses: seg_0 - {len(rmt_lose_seg_0)}, seg_1 - {len(rmt_lose_seg_1)}, baseline loses: {len(baseline_lose)} (to seg 0: {len(baseline_lose_seg_0)})')

rmt loses: seg_0 - 75, seg_1 - 67, baseline loses: 76 (to seg 0: 58)


In [79]:
# baseline 

accuracy = (baseline_df.pred_seg == baseline_df.labels_seg).mean()
conf_mat = confusion_matrix(y_pred=baseline_df.pred_seg, y_true=baseline_df.labels_seg, )
conf_mat_normalized = (conf_mat / conf_mat.sum(axis=1).reshape(-1, 1)).round(2)
print(f'Accuracy: {accuracy}\n\nConfusion matrix: \n{conf_mat}\nNormalized: \n{conf_mat_normalized}')

Accuracy: 0.7097396335583414

Confusion matrix: 
[[ 66  13  16]
 [ 30 416  73]
 [ 48 121 254]]
Normalized: 
[[0.69 0.14 0.17]
 [0.06 0.8  0.14]
 [0.11 0.29 0.6 ]]


In [80]:
# rmt

df = rmt_df 
for l in [0, 1]:
    accuracy = (df[f'pred_seg_{l}'] == df[f'labels_seg_{l}']).mean()
    conf_mat = confusion_matrix(y_pred=df[f'pred_seg_{l}'], y_true=df[f'labels_seg_{l}'])
    conf_mat_normalized = (conf_mat / conf_mat.sum(axis=1).reshape(-1, 1)).round(2)
    print(f'\nSegment {l}\nAccuracy: {accuracy}\n\nConfusion matrix: \n{conf_mat}\nNormalized: \n{conf_mat_normalized}')


Segment 0
Accuracy: 0.6933461909353905

Confusion matrix: 
[[ 65  11  19]
 [ 28 372 119]
 [ 56  85 282]]
Normalized: 
[[0.68 0.12 0.2 ]
 [0.05 0.72 0.23]
 [0.13 0.2  0.67]]

Segment 1
Accuracy: 0.7184185149469624

Confusion matrix: 
[[ 64  13  18]
 [ 22 389 108]
 [ 44  87 292]]
Normalized: 
[[0.67 0.14 0.19]
 [0.04 0.75 0.21]
 [0.1  0.21 0.69]]


In [81]:
pd.DataFrame(conf_mat, index=labels_map.keys(), columns=labels_map.keys())

Unnamed: 0,Contradiction,Entailment,Not mentioned
Contradiction,64,13,18
Entailment,22,389,108
Not mentioned,44,87,292


In [82]:
# rmt

df = rmt_df[rmt_df.ids.isin(rmt_lose_seg_1)]
for l in [0, ]:
    accuracy = (df[f'pred_seg_{l}'] == df[f'labels_seg_{l}']).mean()
    conf_mat = confusion_matrix(y_pred=df[f'pred_seg_{l}'], y_true=df[f'labels_seg_{l}'])
    conf_mat_normalized = (conf_mat / conf_mat.sum(axis=1).reshape(-1, 1)).round(2)
    print(f'\nSegment {l}\nAccuracy: {accuracy}\n\nConfusion matrix: \n{conf_mat}\nNormalized: \n{conf_mat_normalized}')


Segment 0
Accuracy: 0.3582089552238806

Confusion matrix: 
[[ 3  2  2]
 [ 2 13 31]
 [ 3  3  8]]
Normalized: 
[[0.43 0.29 0.29]
 [0.04 0.28 0.67]
 [0.21 0.21 0.57]]


In [83]:
pd.DataFrame(conf_mat, index=labels_map.keys(), columns=labels_map.keys())

Unnamed: 0,Contradiction,Entailment,Not mentioned
Contradiction,3,2,2
Entailment,2,13,31
Not mentioned,3,3,8


In [84]:
# rmt

df = rmt_df[rmt_df.ids.isin(rmt_lose_seg_0)]
for l in [0, 1]:
    accuracy = (df[f'pred_seg_{l}'] == df[f'labels_seg_{l}']).mean().round(2)
    conf_mat = confusion_matrix(y_pred=df[f'pred_seg_{l}'], y_true=df[f'labels_seg_{l}'])
    conf_mat_normalized = (conf_mat / conf_mat.sum(axis=1).reshape(-1, 1)).round(2)
    print(f'\nSegment {l}\nAccuracy: {accuracy}\n\nConfusion matrix: \n{conf_mat}\nNormalized: \n{conf_mat_normalized}')


Segment 0
Accuracy: 0.0

Confusion matrix: 
[[ 0  2  2]
 [ 6  0 50]
 [ 9  6  0]]
Normalized: 
[[0.   0.5  0.5 ]
 [0.11 0.   0.89]
 [0.6  0.4  0.  ]]

Segment 1
Accuracy: 0.43

Confusion matrix: 
[[ 0  3  1]
 [ 2 23 31]
 [ 3  3  9]]
Normalized: 
[[0.   0.75 0.25]
 [0.04 0.41 0.55]
 [0.2  0.2  0.6 ]]


In [90]:
# baseline 
df = baseline_df[baseline_df.ids.isin(baseline_lose)]
accuracy = (df.pred_seg == df.labels_seg).mean()
conf_mat = confusion_matrix(y_pred=df.pred_seg, y_true=df.labels_seg, )
conf_mat_normalized = (conf_mat / conf_mat.sum(axis=1).reshape(-1, 1)).round(2)
print(f'Accuracy: {accuracy}\n\nConfusion matrix: \n{conf_mat}\nNormalized: \n{conf_mat_normalized}')

Accuracy: 0.0

Confusion matrix: 
[[ 0  2  3]
 [ 8  0 11]
 [10 42  0]]
Normalized: 
[[0.   0.4  0.6 ]
 [0.42 0.   0.58]
 [0.19 0.81 0.  ]]


In [91]:
pd.DataFrame(conf_mat, index=labels_map.keys(), columns=labels_map.keys())

Unnamed: 0,Contradiction,Entailment,Not mentioned
Contradiction,0,2,3
Entailment,8,0,11
Not mentioned,10,42,0


In [86]:
# print('What label model distinguishes worse than its rival?\n RMT-seg-1, RMT-seg-0, baseline')
# rmt_df[rmt_df.ids.isin(rmt_lose_seg_1)].labels_seg_1.value_counts()/rmt_df.labels_seg_1.value_counts(), rmt_df[rmt_df.ids.isin(rmt_lose_seg_0)].labels_seg_0.value_counts()/rmt_df.labels_seg_0.value_counts(), baseline_df[baseline_df.ids.isin(baseline_lose)].labels_seg.value_counts()/baseline_df.labels_seg.value_counts()

In [87]:
# tokenizer.encode(text, **encode_plus_kwargs)
n_segments = 2
def split(text, n_segments=n_segments):
    premise = text.split('.')[0]
    encoded = tokenizer.encode(text, **encode_plus_kwargs, add_special_tokens=False)
    segments = np.split(np.array(encoded), n_segments)    
    texts = [tokenizer.decode(s) for s in segments]
    
    
    return [premise] + texts

In [88]:
i = 10

idx = list(rmt_lose_seg_1)[i]
label = id2label_valid[idx] 
rmt_preds = ', '.join(rmt_df[rmt_df.ids == idx][['pred_seg_0', 'pred_seg_1']].astype(str).values[0])
baseline_pred = baseline_df[baseline_df.ids == idx].pred_seg.values[0]
print(f'idx: {idx}\nlabel: {label, labels_map[label]}\nrmt predictions: {rmt_preds}\nbaseline_prediction: {baseline_pred}\n\n')

text = id2text_valid[idx]
texts = split(text)

print('\n\n\n'.join(texts))

NameError: name 'id2label_valid' is not defined

In [170]:
i = 4

idx = list(baseline_lose)[i]
label = id2label_valid[idx] 
rmt_preds = ', '.join(rmt_df[rmt_df.ids == idx][['pred_seg_0', 'pred_seg_1']].astype(str).values[0])
baseline_pred = baseline_df[baseline_df.ids == idx].pred_seg.values[0]
print(f'idx: {idx}\nlabel: {label, labels_map[label]}\nrmt predictions: {rmt_preds}\nbaseline_prediction: {baseline_pred}\n\n')

text = id2text_valid[idx]
texts = split(text)

print('\n\n\n'.join(texts))

idx: 590_nda-16
label: ('Not mentioned', 2)
rmt predictions: 1, 2
baseline_prediction: 1


Receiving Party shall destroy or return some Confidential Information upon the termination of Agreement


Receiving Party shall destroy or return some Confidential Information upon the termination of Agreement. Exhibit ( d ) ( 3 ) NON - DISCLOSURE AGREEMENT In connection with a potential transaction ( “ Proposed Transaction ” ) between 3M Company ( “ Interested Party ” or “ Receiving Party ” ), and Cogent, Inc., a Delaware corporation ( “ Company ” or “ Disclosing Party ” ), the parties wish to protect and preserve the confidential and / or proprietary nature of certain information and materials of the Company that may be disclosed or made available to the Interested Party or its Representatives ( as defined below ) in connection with certain discussions, negotiations or dealings between the parties relating to the Proposed Transaction. In consideration of the foregoing and the rights and obligat

### selective

In [113]:


i = 2

ids = rmt_df[(rmt_df.pred_seg_0 == rmt_df.labels_seg_0) & ( rmt_df.pred_seg_1 != rmt_df.labels_seg_1)].ids.values
idx = ids[i]
label = id2label_valid[idx] 
rmt_preds = ', '.join(rmt_df[rmt_df.ids == idx][['pred_seg_0', 'pred_seg_1']].astype(str).values[0])
baseline_pred = baseline_df[baseline_df.ids == idx].pred_seg.values[0]
print(f'idx: {idx}\nlabel: {label, labels_map[label]}\nrmt predictions: {rmt_preds}\nbaseline_prediction: {baseline_pred}\n\n')

text = id2text_valid[idx]
texts = split(text)

print('\n\n\n'.join(texts))

idx: 70_nda-13
label: ('Not mentioned', 2)
rmt predictions: 2, 1
baseline_prediction: 2


Receiving Party may acquire information similar to Confidential Information from a third party


Receiving Party may acquire information similar to Confidential Information from a third party. CONFIDENTIALITY, NON - DISCLOSURE & APPROPRIATE USE AGREEMENT FAU has a legal responsibility to safeguard the confidentiality and security of our patients ’ protected health information ( “ PHI ” ) as well as operational, proprietary, and student and employee information ( collectively “ FAU Confidential Information ” ). This information may include, but is not limited to, patient health records, as well as information regarding human resources, payroll, fiscal matters, research, and strategic planning, and may exist in any form, including electronic, video, spoken, or written. This Agreement applies to all members of the workforce, including but not limited to, employees, volunteers, students, faculty, phys

In [116]:
rmt_df[(rmt_df.pred_seg_0 == rmt_df.labels_seg_0) & ( rmt_df.pred_seg_1 != rmt_df.labels_seg_1)].labels_seg_1.value_counts()

2    21
1    15
0     5
Name: labels_seg_1, dtype: int64

mmaybe finetune only memory weights
use mem outputs to decode previous segment with a separate decoder
orr pool answer to task not from cls but from mem tokens
check if  gradient flows through memory
decode using conccatenated memory, not last segment
train baseline without question on qasper

how do we improve remembering using memory