In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import os
import warnings
warnings.filterwarnings('ignore')  
import torch
from torch import nn, cuda, optim
from torch.optim import AdamW
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import copy

# import transformers
from transformers import (
    BertModel,
    BertForTokenClassification,
    BertTokenizerFast,
    AutoTokenizer,
    AutoModelForTokenClassification,
    get_linear_schedule_with_warmup
)

! pip install sentence-transformers
from sentence_transformers import SentenceTransformer

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    f1_score,
    accuracy_score,
    precision_score,
    recall_score,
    precision_recall_fscore_support,
    classification_report
)

device = 'cuda' if cuda.is_available() else 'cpu'
if device == 'cuda':
    torch.cuda.empty_cache()
print(device)

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

def generate_random_seed():
    return random.randint(1, 1000)

def set_random_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        
rs = generate_random_seed()
set_random_seed(rs)

In [None]:
class Config:
    def __init__(self, runs, epochs, batch_size, 
                 num_labels = 3, label_list = ['O', 'B', 'I'], model_checkpoint = "allenai/longformer-base-4096", max_len = 1024, use_similarity = False, mode_similarity = 'sbert', lambda_madeup = 1, lambda_unrecognized = 1):
        self.num_labels = num_labels
        self.label_list = label_list
        self.model_checkpoint = model_checkpoint
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_checkpoint, add_prefix_space=True)
        self.runs = runs
        self.epochs = epochs
        self.batch_size = batch_size
        self.max_len = max_len
        self.use_similarity = use_similarity
        self.mode_similarity = mode_similarity
        self.lambda_mu = lambda_madeup
        self.lambda_unr = lambda_unrecognized
        self.similarity_model = None
        if (use_similarity):
            if mode_similarity == 'sbert':
                self.similarity_model = SentenceTransformer("all-MiniLM-L6-v2")
            elif mode_similarity == 'sbert-sts-bws':
                self.similarity_model = SentenceTransformer("/kaggle/input/similaritymodels/sbert-sts-bws/content/models/sbert-all-MiniLM-L6-v2-sts-bws/final")
            elif mode_similarity == 'arguebert-sts-bws':
                self.similarity_model = SentenceTransformer("/kaggle/input/similaritymodels/arguebert-sts-bws/content/models/argueBert_base_similar-sts-bws/final")
            else:
                raise Exception("Incorrect similarity mode. Options are: 'sbert', 'sbert-sts-bws' and 'arguebert-sts-bws'.")
                
class BestModelsTracker:
    def __init__(self):
        self.best_f1 = -float('inf')
        self.best_loss = float('inf')
        self.best_pm = -float('inf')
        self.best_f1_model = None
        self.best_loss_model = None
        self.best_pm_model = None
        self.best_run_f1 = None
        self.best_run_loss = None
        self.best_run_pm = None
        self.best_epoch_f1 = None
        self.best_epoch_loss = None
        self.best_epoch_pm = None

    def update(self, model, val_loss, macro_f1, pm, run, epoch):
        # Update best F1 model
        if macro_f1 > self.best_f1:
            self.best_f1 = macro_f1
            self.best_run_f1 = run
            self.best_epoch_f1 = epoch
            self.best_f1_model = copy.deepcopy(model.state_dict())

        # Update best loss model
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            self.best_run_loss = run
            self.best_epoch_loss = epoch
            self.best_loss_model = copy.deepcopy(model.state_dict())

        # Update best PM model
        if pm > self.best_pm:
            self.best_pm = pm
            self.best_run_pm = run
            self.best_epoch_pm = epoch
            self.best_pm_model = copy.deepcopy(model.state_dict())

    def save_models(self, dataname):
        torch.save(self.best_f1_model, f'model-f1-{dataname}-{self.best_run_f1}-{self.best_epoch_f1}.pt')
        torch.save(self.best_loss_model, f'model-loss-{dataname}-{self.best_run_loss}-{self.best_epoch_loss}.pt')
        torch.save(self.best_pm_model, f'model-pm-{dataname}-{self.best_run_pm}-{self.best_epoch_pm}.pt')
            
def get_class_weights(df, config):
    
    labels = df.labels.values
    labels = [t.split() for t in labels]
    labels = [j for sub in labels for j in sub]
    total_samples = len(labels)
    
    mapping = config.label_list
    labels = [mapping.index(x) for x in labels]
    class_counts = torch.bincount(torch.tensor(labels))
    class_weights = total_samples / (len(mapping) * class_counts)
    class_weights = class_weights / class_weights.sum()
    class_weights = [round(weight, 4) for weight in class_weights.cpu().numpy()]
    
    return class_weights

def tokenize_and_align_labels(txts, lbls, config):
    tokenizer, max_len, mapping = config.tokenizer, config.max_len, config.label_list

    tokenized_inputs = tokenizer(txts, is_split_into_words=True,
                                 max_length = max_len, 
                                 padding = 'max_length', 
                                 truncation=True,
                                 return_tensors = 'pt')

    labels = []
    for i, label in enumerate(lbls):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        previous_label = None
        label_ids = []
        for word_idx in word_ids:
            # Special tokens have a word id that is None. We set the label to -100 so they are automatically
            # ignored in the loss function.            
            if word_idx is None:
                label_ids.append(-100)
            # We set the label for the first token of each word.
            elif word_idx != previous_word_idx:
                label_ids.append(mapping.index(label[word_idx]))
                previous_label = label[word_idx]
            # For the other tokens in a word, we set the label to the current label.
            else:
                new_label = 'O' if previous_label == 'O' else 'I'+previous_label[1:]
                label_ids.append(mapping.index(new_label))
                previous_label = new_label
                
            previous_word_idx = word_idx

        labels.append(label_ids)

    return tokenized_inputs, labels

class SequenceLabelingDataset(torch.utils.data.Dataset):
    def __init__(self, df, config):
        lb = [x.split() for x in df.labels.values.tolist()]
        txt = [i.split() for i in df.tokens.values.tolist()]
        self.encodings, self.labels = tokenize_and_align_labels(txt, lb, config)

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)
    
class SimpleTagger(nn.Module):
    def __init__(self, config):
        super(SimpleTagger, self).__init__()
        self.configuration = config
        self.transf = AutoModelForTokenClassification.from_pretrained(config.model_checkpoint, num_labels = config.num_labels)
    
    def forward(self, input_ids, attention_mask, labels = None, lossfn = None):
        
        if labels is not None: # training
            
            output_hidden = (self.configuration.use_similarity) and (self.configuration.mode_similarity == 'cosine')
            
            outputs = self.transf(input_ids = input_ids, 
                                attention_mask = attention_mask, 
                                labels = labels, 
                                output_hidden_states = output_hidden)
            
            loss = lossfn(outputs.logits.view(-1, 3), labels.view(-1).long())
            
            if output_hidden:
                return loss, outputs.logits, outputs.hidden_states[-1]
            else:
                return loss, outputs.logits, None
        else: # inference
            outputs = self.transf(input_ids = input_ids, 
                                    attention_mask = attention_mask)
            return outputs.logits
        
def load_data(df, config):
    
    train_seq_df = df.loc[df['set'] == 'train']
    if 'dev' in df['set'].values:
        val_seq_df = df.loc[df['set'] == 'dev']
    else:
        train_seq_df, val_seq_df = train_test_split(train_seq_df, test_size = 0.1, random_state = 2023)
    test_seq_df = df.loc[df['set'] == 'test']

    train_dataset, val_dataset, test_dataset = SequenceLabelingDataset(train_seq_df, config), \
                                            SequenceLabelingDataset(val_seq_df, config), \
                                            SequenceLabelingDataset(test_seq_df, config)
    
    batch_size = config.batch_size
    train_loader, val_loader, test_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True), \
                                            DataLoader(val_dataset, batch_size=batch_size), \
                                            DataLoader(test_dataset, batch_size=batch_size)
    
    class_weights = get_class_weights(train_seq_df, config)
    
    return train_loader, val_loader, test_loader, class_weights

In [None]:
def get_arguments_indices(sequence):
    indices = []
    start_index = None
    for i, label in enumerate(sequence):
        if label == 1:
            
            if start_index is None:
                start_index = i
            else:
                indices.append((start_index, i))
                start_index = i
        
        elif (label == 0) and (start_index is not None):
            indices.append((start_index, i))
            start_index = None

    if start_index is not None:
        indices.append((start_index, len(sequence)))

    return indices

def compute_r_matrix(gold_indices, predicted_indices):

    def get_R(gold_argument, predicted_argument):
        (gold_start, gold_end) = gold_argument
        (pred_start, pred_end) = predicted_argument

        intersection_start = max(gold_start, pred_start)
        intersection_end = min(gold_end, pred_end)

        len_intersection_interval = (intersection_end - intersection_start) if intersection_start <= intersection_end else 0
        len_longer_span = max(gold_end - gold_start, pred_end - pred_start)
        return round((len_intersection_interval / len_longer_span), 3)
    
    R_matrix = np.zeros((len(gold_indices), len(predicted_indices)), dtype=float)
    
    for i, gold_argument in enumerate(gold_indices):
        for j, predicted_argument in enumerate(predicted_indices):
            R_matrix[i][j] = get_R(gold_argument, predicted_argument)
    
    return R_matrix

def get_arguments_from_tokens(tokens, tokenizer):
    return tokenizer.convert_tokens_to_string(tokens).strip()


def get_categorized_arguments(clean, config):
    instances = [(x, y, z) for x, y, z in zip(clean[0], clean[1], clean[2])]
    
    categorization = []
    nro_unrecognized = 0
    
    for (tokens, labels, predictions) in instances:
        ground_truth_indices = get_arguments_indices(labels)
        ground_truth_arguments = [get_arguments_from_tokens(tokens[gr_start : gr_end], config.tokenizer) for (gr_start, gr_end) in ground_truth_indices]
        
        predicted_args_indices = get_arguments_indices(predictions)
        
        R_matrix = compute_r_matrix(ground_truth_indices, predicted_args_indices)
        
        for i, (pr_start, pr_end) in enumerate(predicted_args_indices):
            column = R_matrix[:, i]
            positive_values = column[column > 0].tolist()
            predicted_argument = get_arguments_from_tokens(tokens[pr_start:pr_end], config.tokenizer)
            
            if len(positive_values) > 0:
                
                for index, val in enumerate(column):
                    if val > 0:
                        if val == 1:
                            categorization.append((predicted_argument, ground_truth_arguments[index], 'Match'))
                        else:
                            categorization.append((predicted_argument, ground_truth_arguments[index], 'no-Match'))
            else:
                categorization.append((predicted_argument, '-', 'made-up'))
                
        for i, (gd_start, gd_end) in enumerate(ground_truth_indices):
            row = R_matrix[i, :]
            positive_values = row[row > 0].tolist()
            if len(positive_values) == 0:
                nro_unrecognized += 1
            

    return categorization, nro_unrecognized

In [None]:
def clean_batch_elements(batch_input_ids, batch_labels, batch_predictions, tokenizer):
    eval_tokens = []
    eval_labels = []
    eval_predictions = []
    for i in range(len(batch_input_ids)):
        tokens = tokenizer.convert_ids_to_tokens(batch_input_ids[i].tolist())
        labels = batch_labels[i]
        preds = batch_predictions[i]
        
        if '<s>' in tokens and '</s>' in tokens:
            start = tokens.index('<s>')
            end = tokens.index('</s>') + 1
            tokens = tokens[start:end]
            labels = labels[start:end]
            preds = preds[start:end]
            
        assert len(preds) == len(tokens)
        
        
        filtered_tokens, filtered_labels, filtered_preds = [], [], []
        for tk, lb, pr in zip(tokens, labels, preds):
            if tk not in tokenizer.all_special_tokens:
#                 tk = tk.lstrip('Ġ')
#                 if tk != '':
                if tk != '' and tk != '<s>' and tk != '</s>':
                    filtered_tokens.append(tk)
                    filtered_labels.append(lb)
                    filtered_preds.append(pr)
                
        eval_tokens.append(filtered_tokens)
        eval_labels.append(filtered_labels)
        eval_predictions.append(filtered_preds)
        
    return eval_tokens, eval_labels, eval_predictions

            
def fn_similarity_1(golds, preds, config):
    # Compute embeddings for both lists
    embeddings1 = config.similarity_model.encode(golds, show_progress_bar=False)
    embeddings2 = config.similarity_model.encode(preds, show_progress_bar=False)

    similarities = config.similarity_model.similarity(embeddings1, embeddings2)
    return [(1-abs(similarities[i][i].item())) for i in range(len(similarities))]


def count_predicted_arguments(clean, config):
    categorized_arguments, _ = get_categorized_arguments(clean, config)
    
    number_predicted = len(categorized_arguments)
    number_pm_predicted = len([x[0] for x in categorized_arguments if x[2] == 'Match'])
    number_nomatch_predicted = len([x[0] for x in categorized_arguments if x[2] == 'no-Match'])
    number_madeup_predicted = len([x[0] for x in categorized_arguments if x[2] == 'made-up'])

    return number_predicted, number_pm_predicted, number_nomatch_predicted, number_madeup_predicted

def count_gold_arguments(clean, config):
    instances = [(x, y, z) for x, y, z in zip(clean[0], clean[1], clean[2])]
    
    number_gold_arguments = 0
    
    categorized_arguments, nro_unrecognized = get_categorized_arguments(clean, config)
    
    for (tokens, labels, predictions) in instances:
        ground_truth_indices = get_arguments_indices(labels)
        number_gold_arguments += len(ground_truth_indices)
        
    return number_gold_arguments, nro_unrecognized
    
def compute_similarity_error(clean, config):
    partial_match_error = 0
    made_up_error = 0
    unrecognized_error = 0
    
    categorized_arguments, number_unrecognized_arguments = get_categorized_arguments(clean, config)    
    no_match_arguments = [(x[0], x[1]) for x in categorized_arguments if x[2] == 'no-Match']
    madeup_arguments = [x[0] for x in categorized_arguments if x[2] == 'made-up']
    
    number_predicted_arguments = len(categorized_arguments)
    number_gold_arguments, _ = count_gold_arguments(clean, config)
    
    if len(no_match_arguments) > 0:
        pred_args = [x for x, y in no_match_arguments]
        gold_args = [y for x, y in no_match_arguments]
        partial_match_error = round(sum(fn_similarity_1(gold_args, pred_args, config)), 3)
        
    epsilon = 1e-10
    made_up_error = len(madeup_arguments) / (number_predicted_arguments + epsilon)
    
    unrecognized_error = number_unrecognized_arguments / (number_gold_arguments + epsilon)
    
    total_similarity_error = partial_match_error + config.lambda_mu*made_up_error + config.lambda_unr*unrecognized_error
    return total_similarity_error


def train_model(model, train_loader, optimizer, config, lossfn):
#     progress_bar = tqdm(range(len(train_loader)))
    
    model.train()

    train_loss = 0
    
    for batch in train_loader:
        batch = tuple(v.to(device) for t, v in batch.items())
        loss, outputs = None, None
        
        batch_input_ids, batch_attention_mask, batch_labels = batch
        loss, outputs, last_hidden_states = model(batch_input_ids, attention_mask = batch_attention_mask, labels = batch_labels, lossfn = lossfn)
                        
        if config.use_similarity:
            batch_labels = batch_labels.detach().cpu().numpy()
            batch_predictions = np.argmax(outputs.detach().cpu().numpy(), axis = 2).tolist()
            clean_elements = clean_batch_elements(batch_input_ids, batch_labels, batch_predictions, config.tokenizer)
            similarity_loss = compute_similarity_error(clean_elements, config)
            
            loss = loss + similarity_loss
            
        train_loss += loss.item()

        # backprop
        optimizer.zero_grad()
        
        loss.backward()
        
        # gradient clipping
        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=1.0)

        optimizer.step()
        
        print(f"GPU memory used: {torch.cuda.memory_allocated() / 1024**2} MB")
        print(f"Total GPU memory reserved: {torch.cuda.memory_reserved() / 1024**2} MB")
        
#         progress_bar.update(1)

    return round((train_loss / len(train_loader)), 4)

def evaluate_model(model, dataloader, config, tokenizer, lossfn):

    model.eval()

    eval_loss = 0
    number_of_predicted_arguments  = 0
    number_of_predicted_pm_arguments = 0
    number_of_predicted_nomatch_arguments = 0
    number_of_predicted_madeup_arguments = 0
    number_of_gold_arguments = 0
    number_of_unrecognized_arguments = 0
    
    eval_tokens, eval_labels, eval_predictions = [], [], []
    
    with torch.no_grad():
        for batch in dataloader:
            batch = tuple(v.to(device) for t, v in batch.items())
            loss, outputs = None, None
            
            batch_input_ids, batch_attention_mask, batch_labels = batch
            loss, outputs, _ = model(batch_input_ids, attention_mask = batch_attention_mask, labels = batch_labels, lossfn = lossfn)
            
            batch_labels = batch_labels.detach().cpu().numpy()
            batch_predictions = np.argmax(outputs.detach().cpu().numpy(), axis = 2).tolist()
            clean_elements = clean_batch_elements(batch_input_ids, batch_labels, batch_predictions, tokenizer)
            
            batch_predicted_arguments_counts = count_predicted_arguments(clean_elements, config)
            batch_gold_arguments_counts = count_gold_arguments(clean_elements, config)
            
            number_of_predicted_arguments += batch_predicted_arguments_counts[0]
            number_of_predicted_pm_arguments += batch_predicted_arguments_counts[1]
            number_of_predicted_nomatch_arguments += batch_predicted_arguments_counts[2]
            number_of_predicted_madeup_arguments += batch_predicted_arguments_counts[3]
            
            number_of_gold_arguments += batch_gold_arguments_counts[0]
            number_of_unrecognized_arguments += batch_gold_arguments_counts[1]
            
            if config.use_similarity:
                similarity_loss = compute_similarity_error(clean_elements, config)
                loss = loss + similarity_loss

            eval_loss += loss.item()
            
            eval_tokens += clean_elements[0]
            eval_labels += clean_elements[1]
            eval_predictions += clean_elements[2]
            
#             print(f"GPU memory used: {torch.cuda.memory_allocated() / 1024**2} MB")
#             print(f"Total GPU memory reserved: {torch.cuda.memory_reserved() / 1024**2} MB")

            
    
    flattened_labels = [j for sub in eval_labels for j in sub]
    flattened_predictions = [j for sub in eval_predictions for j in sub]
    
    eval_loss = round((eval_loss / len(dataloader)), 4)
    eval_f1 = f1_score(flattened_labels, flattened_predictions, average = 'macro')
    eval_pm = round((number_of_predicted_pm_arguments / number_of_predicted_arguments), 4)
    eval_counter = [number_of_predicted_pm_arguments, 
                    number_of_predicted_nomatch_arguments, 
                    number_of_predicted_madeup_arguments, 
                    number_of_predicted_arguments,
                    number_of_unrecognized_arguments,
                    number_of_gold_arguments]
    
    return eval_loss, eval_f1, eval_pm, eval_counter


def test_model(model, dataloader, tokenizer):

    model.eval()

    eval_tokens, eval_labels, eval_predictions = [], [], []

    with torch.no_grad():
        for batch in dataloader:
            batch = tuple(v.to(device) for t, v in batch.items())
            loss, outputs = None, None
            
            batch_input_ids, batch_attention_mask, batch_labels = batch
            logits = model(batch_input_ids, attention_mask = batch_attention_mask)
            
            batch_labels = batch_labels.detach().cpu().numpy()
            batch_predictions = np.argmax(logits.detach().cpu().numpy(), axis = 2).tolist()
            
            clean_elements = clean_batch_elements(batch_input_ids, batch_labels, batch_predictions, tokenizer)
            eval_tokens += clean_elements[0]
            eval_labels += clean_elements[1]
            eval_predictions += clean_elements[2]
    
    return eval_tokens, eval_labels, eval_predictions

def save_predictions(tokens, labels, predictions, file_path):
    with open(file_path, 'w', encoding = 'utf-8') as nf:

        for tks, lbs, prds in zip(tokens, labels, predictions):
            for tk, lb, pr in zip(tks, lbs, prds):
                nf.write(f"{tk} {lb} {pr}\n")

            nf.write(f"\n") 

In [None]:
# DATA
dataname = 'pe' 

df = pd.read_csv(f'/kaggle/input/mtaa-proyecto-final/{dataname}.csv')
print(df.shape)

similarity_modes = ['sbert', 'sbert-sts-bws', 'arguebert-sts-bws']

# lambda_madeup = 1, lambda_unrecognized = 1 webis
# lambda_madeup = 10, lambda_unrecognized = 1 ugen
# lambda_madeup = 1, lambda_unrecognized = 10 pe

# CONFIG
experiments_configuration = Config(runs = 10, epochs = 15, batch_size = 4, max_len = 1024, 
                                   model_checkpoint = "allenai/longformer-base-4096", 
                                   num_labels = 3, label_list = ['O', 'B', 'I'], 
                                   use_similarity = True, mode_similarity = similarity_modes[2],
                                   lambda_madeup = 1, lambda_unrecognized = 10)

tracker = BestModelsTracker() # tracker for all runs.
counter_data = []


for nrun in range(experiments_configuration.runs):
    rs = generate_random_seed()
    set_random_seed(rs)
    
    run_best_eval_loss = float('inf')
    run_best_epoch = 0
    run_best_model_state = None
    
    # MODEL
    tagger = SimpleTagger(experiments_configuration)
    tagger.to(device)

    # OPTIMIZER
    optimizer = torch.optim.AdamW(tagger.parameters(), lr = 1e-5, eps = 1e-8)
    
    train_loader, val_loader, test_loader, class_weights = load_data(df, experiments_configuration)
    print(len(train_loader), len(val_loader), len(test_loader))
        
    base_loss_fn = nn.CrossEntropyLoss(weight = torch.tensor(class_weights, dtype=torch.float32).to(device)) if dataname == 'ugen' else nn.CrossEntropyLoss()
        
    
    for epoch in range(experiments_configuration.epochs):
        
        train_loss = train_model(tagger, train_loader, optimizer, experiments_configuration, base_loss_fn)
        
        # evaluate model
        eval_loss, eval_f1, eval_pm, eval_counter = evaluate_model(tagger, val_loader, experiments_configuration, experiments_configuration.tokenizer, base_loss_fn)
        counter_data.append([dataname, nrun, epoch]+eval_counter)
        print(f"Epoch: {epoch}/{experiments_configuration.epochs-1} | Train_loss={train_loss} | Eval_loss={eval_loss} | Eval_F1={eval_f1} | Eval_PM={eval_pm}")
        
        tracker.update(tagger, eval_loss, eval_f1, eval_pm, nrun, epoch)
        
        if eval_loss < run_best_eval_loss:
            run_best_eval_loss = eval_loss
            run_best_epoch = epoch
            run_best_model_state = copy.deepcopy(tagger.state_dict())
            print(f"New best model in epoch {epoch} (eval_loss = {eval_loss})")
            
    # testing
    best_tagger = SimpleTagger(experiments_configuration)
    best_tagger.load_state_dict(run_best_model_state)
    best_tagger.to(device)
    
    tokens, labels, preds = test_model(best_tagger, test_loader, experiments_configuration.tokenizer)
    assert len(tokens) == len(labels) == len(preds)
    if len(tokens) > 0:
        assert len(tokens[0]) == len(labels[0]) == len(preds[0])

    save_predictions(tokens, labels, preds, f'test_{dataname}_{nrun}.txt')
    
#     if run_best_eval_loss < best_loss:
#         best_loss = run_best_eval_loss
#         best_model = copy.deepcopy(run_best_model_state)
#         best_run = nrun
        
    print(f"end run {nrun}")
    print()
    
    
# saving best model
# model_path = f"model-{best_run}-{dataname}.pt"
# if best_model is not None:
#     torch.save(best_model, model_path)

tracker.save_models(dataname)
pd.DataFrame(counter_data, columns = ['dataname', 'run', 'epoch', 'pm', 'no-pm', 'mu', 'total_predicted', 'unr', 'total_gold']).to_csv(f'errors_{dataname}.csv', index = False)

In [None]:
import os
import zipfile
from IPython.display import FileLink

def zip_files(folder_path, zip_name):
    # Crear un archivo ZIP
    with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Recorrer todos los archivos en la carpeta
        for foldername, subfolders, filenames in os.walk(folder_path):
            for filename in filenames:
                # Comprobar si el archivo es un archivo TXT o CSV
                if filename.endswith('.txt') or filename.endswith('.csv') or filename.endswith('.pt'):
                    # Ruta completa del archivo
                    file_path = os.path.join(foldername, filename)
                    # Agregar el archivo al archivo ZIP
                    zipf.write(file_path, os.path.relpath(file_path, folder_path))

# Llamar a la función para comprimir los archivos
folder_path = '/kaggle/working/'
zip_name = 'test.zip'
zip_files(folder_path, zip_name)

FileLink(r'test.zip')