In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
!export CUDA_LAUNCH_BLOCKING=1
#import sys
#sys.path.append('../')
#from models import model1 as m1
import torch
import pickle
import copy
import transformers
from torchtext import data as datx
from torch import nn
from torch import optim
from tqdm import tqdm
import numpy as np
import random

torch.cuda.is_available()
torch.cuda.get_device_name(0)
device_num = 0
torch.cuda.set_device(device_num)


#seed definition
seed = 23

## Reproducibility

In [4]:
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
#torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
np.random.seed(seed)  # Numpy module.
random.seed(seed)  # Python random module.
torch.manual_seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

def _init_fn(worker_id):
    np.random.seed(int(seed))

## Defining sentences training data and DTK trees for sentence 1 and 2 columns

In [5]:
dataPath = ''

#MultiNLI training dataset 
dataset_train = 'training_full.csv'
dataset_test = 'heuristics_evaluation_set.txt'

#DTK trees generated from sentence 1 column of training dataset
nameTree1_train = './dtk_trees_multiNLI_train_sencence_1_tot.pkl' #train_s1
nameTree1_test = './dtk_trees_hans_sentence1.pkl' #train_s1

#DTK trees generated from sentence 2 column of training dataset
nameTree2_train = './dtk_trees_multiNLI_train_sentence_2_tot.pkl' #train_s2
nameTree2_test = './dtk_trees_hans_sentence2.pkl' #train_s1


## Selecting GPU device if available

## Define a function to transform text label to numeric value

In [6]:
def from_label_to_int(e):
    d = {'neutral':0,'entailment':1,'contradiction':0}
    return d[e]

def from_label_to_int_heuristic(e):
    d = {'neutral':0,'entailment':1,'non-entailment':0}
    return d[e]

# Data load step

### First we load training data in a DataFrame, then we build a new DataFram that has the following structure: 
- id -> id of the sentence pair 
- sentence -> single sentence derived from sentence 1 and sentence 2 merge, particularly: **SENTENCE1**[SEP]**SENTENCE2** 
- label -> class label 

In [7]:
import pandas as pd

data = pd.read_csv(dataPath+dataset_train)
gl = data["gold_label"]
s = data["sentence1"] + " [SEP] " + data["sentence2"]
l = data['gold_label'].apply(from_label_to_int)
train = pd.concat([l,s],axis=1)
train.columns = ["label","sentence"]
train.to_csv("training_labeled.csv")
train.head()

train["label"].value_counts()

0    260754
1    130411
Name: label, dtype: int64

In [8]:
data = pd.read_csv(dataPath+dataset_test, delimiter="\t", error_bad_lines=False, )
data.drop(data[data["gold_label"]=="neutral"].index, inplace=True)
gl = data["gold_label"]
s = data["sentence1"] + " [SEP] " + data["sentence2"]
l = data['gold_label'].apply(from_label_to_int_heuristic)
test = pd.concat([l,s],axis=1)
test.columns = ["label","sentence"]
test.to_csv("test_labeled.csv")

### This function truncate the sentences representation in order to respect the maximum length of bert input
We invoke this function in preprocessing step

In [9]:
def _truncate_seq_pair(tokens_a, tokens_b, max_length):
    """Truncates a sequence pair in place to the maximum length."""

    # This is a simple heuristic which will always truncate the longer sequence
    # one token at a time. This makes more sense than truncating an equal percent
    # of tokens from each, since if one sequence is very short then each token
    # that's truncated likely contains more information than a longer sequence.
    while True:
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
        if len(tokens_a) > len(tokens_b):
            tokens_a.pop()
        else:
            tokens_b.pop()


### Preprocessing step, the input of this function is the numeric representation of the sentence.
#### 102 tag is appended to separate the two sentences numeric representation like bert 2 sentence classification input guidelines

In [10]:
def pre_processing(x):
    splitted = []
    tokenized = []
    for s in range(len(x)):
        if x[s]==102:
            splitted.append(x[:s])
            splitted.append(x[s+1:])
    
    _truncate_seq_pair(splitted[0],splitted[1],MAX_LEN-3)
    for s1 in splitted[0]:
        tokenized.append(s1)
    
    tokenized.append(102)
    
    for s2 in splitted[1]:
        tokenized.append(s2)
    return tokenized
            

### Defining Training and Test Batch sizes 

In [11]:
BATCH_SIZE = 32
BATCH_SIZE_test = 32

### Legacy function used to read DTK tree representation

In [12]:
class TreeField(datx.Field):
		def __init__(self, *args, **kwargs):
				super().__init__(*args, **kwargs)

		def preprocess(self, x):
				return x

		def process(self, batch, device=None):
				return torch.stack(batch)


def unplickle_trees(path_tree_file):
    print('--->read DTKs')
    dt_trees = []
    with open(path_tree_file, 'rb') as fr:
        try:
            while True:
                dt_trees.append(pickle.load(fr))
        except EOFError:
            pass
    return [torch.FloatTensor(i) for i in dt_trees]

def add_parsed_tree(test, test_tree_list, field):
		test_Examples_tree_list = []
		for tr in test_tree_list:
				tree = datx.Example.fromlist([tr], [('Tree', field)])
				test_Examples_tree_list.append(tree)
		test.fields['Tree'] = field
		new_test_examples_list = []
		for example, tree_ex in zip(test.examples, test_Examples_tree_list):
				to_append = example
				to_append.Tree = tree_ex.Tree
				new_test_examples_list.append(to_append)
		test.examples = new_test_examples_list
		return test
    
def first_tree(test, test_tree_list, field):
        test_Examples_tree_list = []
        tr = test_tree_list[0]
        tree = datx.Example.fromlist([tr], [('Tree', field)])
        test_Examples_tree_list.append(tree)
        test.fields['Tree'] = field
        new_test_examples_list = []
        for example, tree_ex in zip(test.examples, test_Examples_tree_list):
                to_append = example
                to_append.Tree = tree_ex.Tree
                new_test_examples_list.append(to_append)
        test.examples = new_test_examples_list
        return test

In [13]:
class UnprField(datx.Field):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def preprocess(self, x):
        return x

    def process(self, batch, device=None):
        return batch

def Aduplicate_field(test, dataset_number):
    #field = UnprField(sequential=False, use_vocab=False, batch_first=True)
    field = TreeField(sequential=False, use_vocab=False, batch_first=True)
    test.fields['Text_pntr'] = field
    new_test_examples_list = []
    for index, example in enumerate(test.examples):
        to_append = example
        to_append.Text_pntr = torch.tensor([dataset_number, index])
        new_test_examples_list.append(to_append)
    test.examples = new_test_examples_list
    return test

## Tokenize step

### Defining Bert Tokenizer, preprocessing steps and Max length

In [14]:
MAX_LEN = 125

tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased')
pad_index = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)

TEXT = datx.Field(use_vocab=False,fix_length=MAX_LEN, tokenize=tokenizer.encode, sequential=True,
                  pad_token=pad_index, batch_first=True)

TEXT.preprocessing = pre_processing

LABEL = datx.Field(sequential=False, use_vocab=False, batch_first=True)
TREE = TreeField(sequential=False, use_vocab=False, batch_first=True)

### This function transforms input dataset to train ready data structure

In [15]:
def dataset_to_train(dataset, dataPath,tree1, tree2, extra_trees = [], exclude = 0):
    """
    Prende in unput la stringa del dataset e restituisce tupla (train_iter, test_iter, vocab)
    """

    LABEL = datx.Field(sequential=False, use_vocab=False, batch_first=True)

    
    #fields=[('gold_label', LABEL),('sentence1', None),('sentence2', None)]
    fields=[("id",None),('Label', LABEL),('Text', TEXT)]

    
    test = datx.TabularDataset(path=f'{dataset}.csv', format='csv',fields=fields, skip_header=True)

    test_trees_list_sentence1 = unplickle_trees(f''+dataPath+''+tree1+'')
    test_trees_list_sentence2 = unplickle_trees(f''+dataPath+''+tree2+'')
    
    print(len(test_trees_list_sentence1))
    print(len(test_trees_list_sentence2))

    
    if len(extra_trees) != 0:
        for index in range(len(extra_trees[0])):
            test_trees_list_sentence1.append(extra_trees[0][index])
            test_trees_list_sentence2.append(extra_trees[1][index])
    
    if exclude != 0:
        test_trees_list_sentence1 = test_trees_list_sentence1[exclude:]
        test_trees_list_sentence2 = test_trees_list_sentence2[exclude:]


    
    #test = duplicate_field(test, dataset_number)
    tree_vectors = []
    
    ## concatenating DTK trees vectors of the two sentences
    for vec in range(len(test_trees_list_sentence1)):
        tree_vectors.append(torch.cat((test_trees_list_sentence1[vec],test_trees_list_sentence2[vec])))
        
    train =  add_parsed_tree(test, tree_vectors, TREE)
    
    train_iter, a = datx.Iterator.splits(
            (train, _), sort_key=lambda x: len(x.Text),
            batch_sizes=(BATCH_SIZE, 1))

    return (train_iter, a)

def dataset_to_test(dataset, dataPath, nameTree):
    """
    Prende in unput la stringa del dataset e restituisce tupla (train_iter, test_iter, vocab)
    """

    LABEL = datx.Field(sequential=False, use_vocab=False, batch_first=True)

    
    fields=[("id",None),('Label', LABEL),('Text', TEXT)]
    
    
    test = datx.TabularDataset(path=f'{dataset}.csv', format='csv',fields=fields, skip_header=False)

    test_trees_list_sentence1 = unplickle_trees(f''+dataPath+''+nameTree1+'')
    test_trees_list_sentence2 = unplickle_trees(f''+dataPath+''+nameTree2+'')

    
    dataset_number = 1
    tree_vectors = []
    
    for vec in range(len(test_trees_list_sentence1)):
        tree_vectors.append(torch.cat((test_trees_list_sentence1[vec],test_trees_list_sentence2[vec])))
        
    test = add_parsed_tree(test, tree_vectors, TREE)
    train = test

    train_iter, a = datx.Iterator.splits(
            (train, _), sort_key=lambda x: len(x.Text),
            batch_sizes=(BATCH_SIZE_test, 1), repeat=True)

    return (train_iter, a)



In [16]:
def get_layer_pred(x_synth):

    x_sem = torch.zeros(1, 51).cuda()
    x_sem = torch.tensor(x_sem).to(torch.int64)
    pred_value = model.get_activation(x_sem, x_synth)
        
        
    return pred_value



In [17]:
datasets_test = [ dataPath+'test_labeled']

test_list = [] #each element is a tuple (train, test)


for dat in datasets_test:
    test_list.append(dataset_to_train(dat, dataPath, nameTree1_test, nameTree2_test))

--->read DTKs
--->read DTKs
60000
60000


In [18]:
#MODEL TYPES
BERT_ONLY = "Bert_Only"
BERT_KERMIT = "Bert+Kermit"
BERT_KERMIT_NO_REL = "Bert+Kermit_NO_REL"
BERT_KERMIT_MULTILAYER = "Bert+Kermit_Multi_Layer"

## Training function


In [19]:
import numpy as np

def train(train_iter, dataset_name, EPOCH, ext, model_name= "Bert+Kermit"):
    
    contEp = 0
    lung = len(train_iter) # Usata per calcolare l'accuracy
    print(lung)
    accs = []
    total, correct = 0, 0
    for epoc in (range(EPOCH)):
        f = open(f"results_{model_name + ext}.txt","a")
        contEp += 1
        running_loss = 0
        train_acc = 0
        tot = []
        for elem in tqdm(iter(train_iter)):
            x_sem = elem.Text.cuda()
            x_synth = elem.Tree.cuda()
            target = elem.Label.cuda()
            
            if model_name == BERT_ONLY:
                target_hat = model(x_sem)
            else:
                target_hat = model(x_sem, x_synth)
                
            loss = criterion(target_hat, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
          
            correct += (torch.exp(target_hat).argmax(1) == target).sum().item()
            
            target_hat = list(torch.exp(target_hat).argmax(1))
            #BINARY FUNCTIONS
            d = []
            total += target.size(0)      
        
        print("Epoch: " , contEp)
        print("Loss: " + str(running_loss / lung))
        print(f"Accuracy: { 100 * correct / total}")
        #TEST
        print("===================================")        
        print("Measure on test\n")        
        test_accuracies_NO_mem = []
        
        for elem, dataset_name in zip(test_list, datasets_test):
            print(f"Testing dataset: {dataset_name}")
            test_accuracies_NO_mem.append(infer(elem[0], model, dataset_name, model_name = model_name, ext = ext))
        print("===================================")        
        ########
        
        f.write(f'Epoch: {contEp}\tLoss: {str(running_loss / lung)}\tAccuracy: { 100 * correct / total}\n')
        f.close()
    return accs

## Inference function

In [20]:
def infer(test_iter, neural_model,dataset_name , ext,EPOCH=1, L=30, lambda_norm=0.001, model_name= "Bert+Kermit"):
    
    running_loss = 0
    train_acc = 0
    lung = len(test_iter)
    tot = []
    preds = []
    targets = []
    indices = []
    
    #Creo copia del modello addestrato
    neural_model.cuda()
    f = open(f"results_{model_name + ext}.txt","a")

    criterion = nn.NLLLoss()
    #TODO change learning rate for inference time 
    optimizer = optim.AdamW(neural_model.parameters(), lr=2e-3)
    total, correct = 0, 0

    for elem in tqdm(iter(test_iter)):
        x_sem = elem.Text.cuda()
        x_synth = elem.Tree.cuda()
        target = elem.Label.cuda()
        
        with torch.torch.no_grad():
            if model_name == BERT_ONLY:
                target_hat = neural_model(x_sem)
            else:
                target_hat = neural_model(x_sem, x_synth)
            loss = criterion(target_hat, target)
            running_loss += loss.item()
            targets.append(target.tolist())
            preds.append(target_hat.exp().argmax(dim=1).tolist())
            np.concatenate(targets, axis=0)
            np.concatenate(preds, axis=0)


        
        #train_acc += (torch.exp(target_hat).argmax(1) == target).sum().item()
        #res = [1 if x == True else 0 for x in list(torch.exp(target_hat).argmax(1) == target)]      
        #BINARY FUNCTIONS
                    
        correct += (torch.exp(target_hat).argmax(1) == target).sum().item()
            
        target_hat = list(torch.exp(target_hat).argmax(1))
        #BINARY FUNCTIONS
        d = []
        total += target.size(0)


        '''       
         d = []
        for e in (torch.round(target_hat)==target):
            if e[0]:
                d.append(1)
            else:
                d.append(0)
        d = torch.tensor(d)
        
        train_acc += d.sum().item()
        res = d
        tot += res.tolist()
        '''
    print("Loss: " + str(running_loss / lung))
    print(f"Accuracy: { 100 * correct / total}")

    f.write(f'TEST:\nLoss: {str(running_loss / lung)}\tAccuracy: { 100 * correct / total}\n')
    f.close()
    return(100 * correct / total), preds, targets


## Defining Bert Model

In [21]:
import torch
from torch import nn
import torch.nn.functional as F

class DTBertPlusLayer(nn.Module):
    def __init__(self, input_dim_bert, input_dim_dt, output_dim):
        super().__init__()
        self.bert = transformers.BertModel.from_pretrained('bert-base-cased', force_download= True).to("cuda" if torch.cuda.is_available() else "cpu")
        #self.synth_sem_linear = nn.Linear(input_dim_bert + input_dim_dt, 1)
        
        #DEEP CLASSIFIER
        self.synth_sem_linear = nn.Linear(input_dim_bert + input_dim_dt, 256)
        self.fc1 = nn.Linear(in_features= 256, out_features=124)
        self.fc2 = nn.Linear(in_features=124, out_features=28)
        self.out = nn.Linear(in_features=28, out_features=1)
        
        #self.activation = {}
        
    def forward(self, x_sem, x_synth):

        x_sem = self.bert(x_sem)[0][:, 0, :]
        x_tot = torch.cat((x_sem, x_synth), 1)
        
        #x_tot = self.synth_sem_linear(x_tot)
        #out = F.sigmoid(x_tot, dim=1)
        
        #return out
        
        #DEEP CLASSIFIER
        t = F.relu(self.synth_sem_linear(x_tot))
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = F.sigmoid(self.out(t))
        return t
        
       
        
    def get_activation(self, x_sem, x_synth):
        with torch.no_grad():
            x_sem = self.bert(x_sem)[0][:, 0, :]
            x_tot = torch.cat((x_sem, x_synth), 1)
            x_tot = self.synth_sem_linear(x_tot)
            out = F.sigmoid(x_tot, dim=1)
        return out
    
class DTBert(nn.Module):
    def __init__(self, input_dim_bert, input_dim_dt, output_dim):
        super().__init__()
        self.bert = transformers.BertModel.from_pretrained('bert-base-uncased', force_download= True).to("cuda" if torch.cuda.is_available() else "cpu")
        #self.synth_sem_linear = nn.Linear(input_dim_bert + input_dim_dt, 1)
        
        #DEEP CLASSIFIER
        self.synth_sem_linear = nn.Linear(input_dim_bert + input_dim_dt, 2)
        
        #self.activation = {}
        
    def forward(self, x_sem, x_synth):
        x_sem = self.bert(x_sem)[0][:, 0, :]
        x_tot = torch.cat((x_sem, x_synth), 1)
        
        #x_tot = self.synth_sem_linear(x_tot)
        #out = F.sigmoid(x_tot, dim=1)
        
        #return out
        
        #DEEP CLASSIFIER
        t = F.log_softmax(self.synth_sem_linear(x_tot))
        return t
        
    def get_activation(self, x_sem, x_synth):
        with torch.no_grad():
            x_sem = self.bert(x_sem)[0][:, 0, :]
            x_tot = torch.cat((x_sem, x_synth), 1)
            x_tot = self.synth_sem_linear(x_tot)
            out = F.log_softmax(x_tot, dim=1)
        return out

class Bert(nn.Module):
    def __init__(self, input_dim_bert, output_dim):
        super().__init__()
        self.bert = transformers.BertModel.from_pretrained('bert-base-uncased', force_download = True).to("cuda" if torch.cuda.is_available() else "cpu")
        self.synth_sem_linear = nn.Linear(input_dim_bert, 2)
        
    def forward(self, x_sem):
        x_sem = self.bert(x_sem)[0][:, 0, :]
        x_tot = self.synth_sem_linear(x_sem)
        out = F.log_softmax((x_tot), dim=1)
        return out
        

## Defining Bert vector size and tree vector size (tree1+tree2)
### Loading choosed Bert Model
### Defining Cost function and optimizer

In [None]:
BERT_DIM = 768
TREE_DIM = 8000

OUTPUT_DIM = 2


#model = DTBert(BERT_DIM, TREE_DIM, OUTPUT_DIM)        

# Defining bert model
model = DTBert(BERT_DIM, TREE_DIM, OUTPUT_DIM)        
#model = DTBert(BERT_DIM, TREE_DIM,1)        
#model = Bert(BERT_DIM, 2)        

model.cuda()

# Loss function
#criterion = nn.NLLoss()

#BINARY LOSS
criterion = nn.NLLLoss()

# Optimizer
optimizer = optim.AdamW(model.parameters(), lr=5e-5)


HBox(children=(IntProgress(value=0, description='Downloading', max=433, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='Downloading', max=440473133, style=ProgressStyle(description_…

## Training Step

### Now we transform initial dataset to train ready data structure

In [None]:
'''
If you want to add additional trees to training set from another pickle
you can use this function. 

You can also take only a subrange with  subrange input
'''

def add_tree_data(tree_path1, tree_path2, subrange=0):  
    et1 = unplickle_trees(tree_path1)
    et2 = unplickle_trees(tree_path2)
    if subrange == 0:
        subrange = len(et1)
    
    extra_trees = [et1[:subrange], et2[:subrange]]
    return extra_trees

#extra_trees = add_tree_data(f''+dataPath+''+nameTree1_test+'',f''+dataPath+''+nameTree2_test+'', 6000)

In [None]:
datasets_train = [ dataPath+'training_labeled']

train_list = [] #each element is a tuple (train, test)


for dat in datasets_train:
    train_list.append(dataset_to_train(dat, dataPath,nameTree1_train, nameTree2_train))


In [None]:
EPOCH = 4

test_accuracies_NO_mem = []

SEEDS = [46, 68, 95, 335, 660]
#SEEDS = [68]

for seed in SEEDS:
    
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    np.random.seed(seed)  # Numpy module.
    random.seed(seed)  # Python random module.
    torch.manual_seed(seed)
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    
    # download bert model 
    model = DTBert(BERT_DIM,TREE_DIM, 2)        
    model.cuda()
    # Loss function
    criterion = nn.NLLLoss()
    # Optimizer
    optimizer = optim.AdamW(model.parameters(), lr=5e-5)
    
    for elem, dataset_name in zip(train_list, datasets_train):
        print(f"Training dataset: {dataset_name}")
        train(elem[0], dataset_name, EPOCH, f"_reproducibility_seed_{seed}", BERT_KERMIT_NO_REL)
    


In [None]:
# num of train epochs
#EPOCH = 4
#test_accuracies_NO_mem = []

#SEEDS = [5, 7, 9, 23, 31]

#train loop
#for elem, dataset_name in zip(train_list, datasets_train):
#    for seed in SEEDS:
        # download bert model 
#        model = DTBert(BERT_DIM,TREE_DIM, 2)        
#        model.cuda()
        # Loss function
#        criterion = nn.NLLLoss()
        # Optimizer
#        optimizer = optim.AdamW(model.parameters(), lr=5e-5)

#        accuracies = train(elem[0], dataset_name, EPOCH, f"_reproducibility_seed_{seed}", BERT_KERMIT_NO_REL)

### Train the model



In [None]:
# num of train epochs
EPOCH = 4
test_accuracies_NO_mem = []

SEEDS = [5, 7, 9, 23, 31]

#train loop
for elem, dataset_name in zip(train_list, datasets_train):
    for seed in SEEDS:
        
        # download bert model 
        model = DTBert(BERT_DIM,TREE_DIM, 2)        
        model.cuda()
        # Loss function
        criterion = nn.NLLLoss()
        # Optimizer
        optimizer = optim.AdamW(model.parameters(), lr=5e-5)
            
        accuracies = train(elem[0], dataset_name, EPOCH, f"_reproducibility_seed_{seed}", BERT_KERMIT_NO_REL)

In [None]:
!pip install seaborn
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
plt.plot(accuracies)
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
def get_layer_pred(x_synth):

    x_sem = torch.zeros(1, 51).cuda()
    x_sem = torch.tensor(x_sem).to(torch.int64)
    pred_value = model.get_activation(x_sem, x_synth)
        
        
    return pred_value

x_synt1 = torch.rand(1, 8000).cuda()

print(get_layer_pred(x_synt1))

In [None]:
datasets_test = [ dataPath+'test_labeled']

test_list = [] #each element is a tuple (train, test)


for dat in datasets_test:
    test_list.append(dataset_to_train(dat, dataPath, nameTree1_test, nameTree2_test))

In [None]:
test_accuracies_NO_mem = []

for elem, dataset_name in zip(test_list, datasets_test):
    print(f"Testing dataset: {dataset_name}")
    test_accuracies_NO_mem.append(infer(elem[0], model, dataset_name))
print("===================================")

In [None]:
def get_preds(data, name):
    predictions = []
    l = []
    index = 0
    acc, preds, targets = infer(data, model, name) #acc, preds, target
    preds = np.concatenate(preds, axis=0).tolist()
    targets = np.concatenate(targets, axis=0).tolist()

    for el in preds:
        predictions.append(f'Index: {index}\tCorrect: {targets[index]}\tPrediction: {el}\n')
        l.append(el)
        index += 1
            
    return predictions, l, targets

In [None]:
for elem, dataset_name in zip(test_list, datasets_test):
    ps, labels, targets = get_preds(elem[0], "Test")

In [None]:
np.unique(np.array(labels), return_counts=True)

In [None]:
test_data = pd.read_csv(dataset_test, delimiter = "\t")
classes = {0:"non-entailment", 1:"entailment"}
labels = list(map(lambda x: classes[x], labels))
test_data["prediction"] = labels
test_data["gl"] = list(map(lambda x: classes[x], targets))



In [None]:
test_data

In [None]:
data_to_ann = pd.read_csv("data_to_annotate.csv")
data_to_ann_to_test = 

In [None]:
np.unique(test_data["prediction"].values, return_counts=True)

In [None]:
def to_visualize(df):
    subsequence = df[df.heuristic=="subsequence"].sample(2)
    constituent = df[df.heuristic=="constituent"].sample(2)
    lexical_overlap = df[df.heuristic=="lexical_overlap"].sample(2)
    return subsequence.append([constituent, lexical_overlap])


moredata = to_visualize(test_data[test_data["prediction"] == "non_entailment"])
#viz_data.to_csv("data_to_visualize.csv")

In [None]:
data_to_analyze = test_data[test_data.heuristic != "subsequence"]

In [None]:
data_to_analyze.sample(200).to_csv("data_to_annotate.csv")

In [None]:
test_data.values[20001]

In [None]:
def save_weights(model):
    torch.save(model, f'Weights_{BERT_ONLY}.pt')
#model = torch.load('./modelli/BERT_DT.pt')

save_weights(model)

In [None]:
test.head(30).values[2]

In [None]:
print(test_accuracies_NO_mem)

In [None]:
with open('pred_DTBERT.txt', mode="w") as outfile: 
    for s in test_accuracies_NO_mem[0][1]:
        outfile.write("%s\n" % s)

In [None]:
with open('accuracy_BERT+DT_70k_2.txt', mode="w") as outfile: 
    for s in test_accuracies_NO_mem:
        outfile.write("%s\n" % s)

In [None]:
#torch.save(model, 'BERT+DT.pt')

#model = torch.load('./modelli/BERT_DT.pt')

In [None]:
model = torch.load('./Weights_Bert+Kermit.pt')