In [1]:
#!nvidia-smi

# Fine-Tuning con LoRA basato sulla similarità tra task

#### Configurazioni generali

Installazione delle librerie necessarie.

Importo i moduli necessari.

In [2]:
import torch

In [3]:
import tllib
import codecarbon



In [4]:
import os
import random
import numpy as np
import pandas as pd
import torch.nn as nn
import transformers
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from collections import Counter

Impostazione del seme casuale per la riproducibilità.

In [5]:
seed_value = 42

os.environ['PYTHONHASHSEED'] = str(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(seed_value)

# Imposto il seme casuale anche per i calcoli CUDA
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)  
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

## Creazione del pool di modelli

In [6]:
from peft import PeftModel
from transformers import RobertaForSequenceClassification

2025-04-18 15:50:39.730584: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-18 15:50:39.745268: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744991439.763309  545895 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744991439.768803  545895 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744991439.782443  545895 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [7]:
# Cartella in cui sono memorizzati i pesi degli adapters LoRA
ADAPTER_DIR = "./adapters-pool/original/"

# Dizionario di configurazione per ogni task
TASK_CONFIGS = {
    "ag":        {"num_classes": 4},
    "sst":       {"num_classes": 2},
    "mnli":      {"num_classes": 3},
    "ei":        {"num_classes": 4},
    "paws":      {"num_classes": 2},
}


# Funzione per creare il pool di modelli
def create_model_pool(adapter_dir=ADAPTER_DIR, device="cuda" if torch.cuda.is_available() else "cpu"):
    
    model_pool = {}

    for folder in os.listdir(adapter_dir):
        adapter_path = os.path.join(adapter_dir, folder)
        task_name = folder.replace("_lora_adapter", "")
        
        if task_name not in TASK_CONFIGS:
            print(f"Task '{task_name}' non trovato in TASK_CONFIGS, verrà ignorato...")
            continue
        
        print(f"Caricamento modello per il task: {task_name}...")

        # Ottengo il numero di classi per il task
        num_classes = TASK_CONFIGS[task_name]["num_classes"]   
        
        # Creo il modello base
        base_model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=num_classes)

        # Carico i pesi LoRA
        lora_model = PeftModel.from_pretrained(base_model, adapter_path)
        
        # Carico i pesi della testa di classificazione 
        classifier_head_path = os.path.join(adapter_path, "classifier_head.pth")
        classifier_state_dict = torch.load(classifier_head_path, map_location=device, weights_only=True)

        print("Chiavi del classifier_head:", classifier_state_dict.keys())

        classifier_module = lora_model.base_model.classifier

        classifier_module.dense.weight.data.copy_(classifier_state_dict["dense.weight"])
        classifier_module.dense.bias.data.copy_(classifier_state_dict["dense.bias"])
        classifier_module.out_proj.weight.data.copy_(classifier_state_dict["out_proj.weight"])
        classifier_module.out_proj.bias.data.copy_(classifier_state_dict["out_proj.bias"])


        lora_model.to(device)
        
        model_pool[task_name] = lora_model
        print(f"Modello '{task_name}' caricato con successo.\n")

    return model_pool


In [8]:
# Creazione del pool di modelli 
model_pool = create_model_pool()

Caricamento modello per il task: ag...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Chiavi del classifier_head: odict_keys(['dense.weight', 'dense.bias', 'out_proj.weight', 'out_proj.bias'])
Modello 'ag' caricato con successo.

Task '.ipynb_checkpoints' non trovato in TASK_CONFIGS, verrà ignorato...
Caricamento modello per il task: sst...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Chiavi del classifier_head: odict_keys(['dense.weight', 'dense.bias', 'out_proj.weight', 'out_proj.bias'])
Modello 'sst' caricato con successo.

Caricamento modello per il task: ei...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Chiavi del classifier_head: odict_keys(['dense.weight', 'dense.bias', 'out_proj.weight', 'out_proj.bias'])
Modello 'ei' caricato con successo.

Caricamento modello per il task: mnli...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Chiavi del classifier_head: odict_keys(['dense.weight', 'dense.bias', 'out_proj.weight', 'out_proj.bias'])
Modello 'mnli' caricato con successo.

Caricamento modello per il task: paws...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Chiavi del classifier_head: odict_keys(['dense.weight', 'dense.bias', 'out_proj.weight', 'out_proj.bias'])
Modello 'paws' caricato con successo.



In [9]:
import shutil

working_pool_path = "./adapters-pool/working/"
shutil.rmtree(working_pool_path, ignore_errors=True)
os.makedirs(working_pool_path, exist_ok=True)

print("La cartella ./adapters-pool/working/ è stata ripulita.")

La cartella ./adapters-pool/working/ è stata ripulita.


In [10]:
import os
import shutil

original_pool_path = "./adapters-pool/original/"
working_pool_path = "./adapters-pool/working/"

os.makedirs(working_pool_path, exist_ok=True)

for model_name in os.listdir(original_pool_path):
    src_path = os.path.join(original_pool_path, model_name)
    dest_path = os.path.join(working_pool_path, model_name)
    
    if not os.path.exists(dest_path):
        shutil.copytree(src_path, dest_path)
        print(f"Copiato: {model_name}")

print("Tutti i modelli sono stati copiati in ./adapters-pool/working/")

Copiato: ag_lora_adapter
Copiato: .ipynb_checkpoints
Copiato: sst_lora_adapter
Copiato: ei_lora_adapter
Copiato: mnli_lora_adapter
Copiato: paws_lora_adapter
Tutti i modelli sono stati copiati in ./adapters-pool/working/


## Definizione delle funzioni per il calcolo della similarità

#### LEEP

In [11]:
def get_model_predictions(model, data_loader):
    model.eval()
    model.to(device)
    
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            token_type_ids = batch['token_type_ids'].to(device)
            labels = batch['labels'].to(device)

            # Salvo le etichette vere
            all_labels.extend(labels.cpu().numpy())
            
            # Ottiengo i logits dal modello
            outputs = model(input_ids=input_ids, 
                            attention_mask=attention_mask, 
                            token_type_ids=token_type_ids)
            
            probabilities = torch.softmax(outputs.logits, dim=-1)

            all_predictions.extend(probabilities.cpu().numpy())

    return np.array(all_predictions), np.array(all_labels).astype(int)

In [12]:
from tllib.ranking import log_expected_empirical_prediction as leep

def calculate_leep_scores(pool_models, target_loader):
    leep_scores = []
    for source_name, source_model in pool_models.items():
        
        predictions, labels = get_model_predictions(source_model, target_loader)
        score = leep(predictions, labels)
        leep_scores.append({"Source": source_name, "Score": score})
        
        print(f"Source: {source_name}, LEEP Score: {score:.4f}")
    return leep_scores

#### LogME

In [13]:
def extract_embeddings(model, data_loader, device):
    model.eval()
    embeddings = []
    labels = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            token_type_ids = batch['token_type_ids'].to(device)
            label = batch['labels'].cpu().numpy()

            # Ottiengo l'output del modello
            outputs = model.roberta(
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids
            )
            # Estraggo la rappresentazione del token [CLS]
            cls_embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()
            embeddings.append(cls_embeddings)
            labels.append(label)
    
    embeddings = np.vstack(embeddings)
    labels = np.concatenate(labels)
    labels = labels.astype(int)
    
    return embeddings, labels

In [14]:
from tllib.ranking import log_maximum_evidence as logme

def calculate_logme_scores(pool_models, target_loader, device):
    logme_scores = []
    for source_name, source_model in pool_models.items():
    
        embeddings, labels = extract_embeddings(source_model, target_loader, device)
        score = logme(embeddings, labels)
        logme_scores.append({"Source": source_name, "Score": score})
        
        print(f"Source: {source_name}, LogME Score: {score:.4f}")
    return logme_scores

#### H-Score

In [15]:
from sklearn.covariance import LedoitWolf

# Funzione della libreria tllib
def regularized_h_score(features: np.ndarray, labels: np.ndarray):
    r"""
    Regularized H-score in `Newer is not always better: Rethinking transferability metrics, their peculiarities, stability and performance (NeurIPS 2021) 
    <https://openreview.net/pdf?id=iz_Wwmfquno>`_.
    
    The  regularized H-Score :math:`\mathcal{H}_{\alpha}` can be described as:

    .. math::
        \mathcal{H}_{\alpha}=\operatorname{tr}\left(\operatorname{cov}_{\alpha}(f)^{-1}\left(1-\alpha \right)\operatorname{cov}\left(\mathbb{E}[f \mid y]\right)\right)
    
    where :math:`f` is the features extracted by the model to be ranked, :math:`y` is the groud-truth label vector and :math:`\operatorname{cov}_{\alpha}` the  Ledoit-Wolf 
    covariance estimator with shrinkage parameter :math:`\alpha`
    Args:
        features (np.ndarray):features extracted by pre-trained model.
        labels (np.ndarray):  groud-truth labels.

    Shape:
        - features: (N, F), with number of samples N and feature dimension F.
        - labels: (N, ) elements in [0, :math:`C_t`), with target class number :math:`C_t`.
        - score: scalar.
    """
    f = features.astype('float64')
    f = f - np.mean(f, axis=0, keepdims=True)  # Center the features for correct Ledoit-Wolf Estimation
    y = labels

    C = int(y.max() + 1)
    g = np.zeros_like(f)

    cov = LedoitWolf(assume_centered=False).fit(f)
    alpha = cov.shrinkage_
    covf_alpha = cov.covariance_

    for i in range(C):
        Ef_i = np.mean(f[y == i, :], axis=0)
        g[y == i] = Ef_i

    covg = np.cov(g, rowvar=False)
    score = np.trace(np.dot(np.linalg.pinv(covf_alpha, rcond=1e-15), (1 - alpha) * covg))

    return score

In [16]:
def calculate_h_scores(pool_models, target_loader, device):
    h_scores = []
    for source_name, source_model in pool_models.items():
        
        embeddings, labels = extract_embeddings(source_model, target_loader, device)
        score = regularized_h_score(embeddings, labels)
        h_scores.append({"Source": source_name, "Score": score})
        
        print(f"Source: {source_name}, H-Score: {score:.4f}")
    
    return h_scores

#### NCE

In [17]:
def get_source_labels(model, data_loader, device):
    model.eval()
    all_predictions = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            token_type_ids = batch['token_type_ids'].to(device)

            # Ottieni i logits dal modello
            logits = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
            
            predictions = torch.argmax(torch.softmax(logits.logits, dim=1), dim=1).cpu().numpy()
            all_predictions.extend(predictions.astype(int))
    
    return np.array(all_predictions)

In [18]:
# # Funzione della libreria tllib con aggiunta di smoothing e clipping
# def negative_conditional_entropy(source_labels: np.ndarray, target_labels: np.ndarray, alpha=1e-6, clip_min=1e-20):
#     """
#     Negative Conditional Entropy with smoothing to handle zero probabilities.

#     Args:
#         source_labels (np.ndarray): Predicted source labels.
#         target_labels (np.ndarray): Ground-truth target labels.
#         alpha (float): Smoothing factor for joint probability estimation.
#         clip_min (float): Minimum value for probabilities to prevent log(0).

#     Returns:
#         float: Negative conditional entropy score.
#     """
#     C_t = int(np.max(target_labels) + 1)
#     C_s = int(np.max(source_labels) + 1)
#     N = len(source_labels)

#     joint = np.zeros((C_t, C_s), dtype=float)
#     for s, t in zip(source_labels, target_labels):
#         joint[t, s] += 1.0

#     joint = (joint + alpha) / (N + alpha * C_s * C_t)

#     p_z = joint.sum(axis=0, keepdims=True)

#     p_target_given_source = (joint / p_z).T 
#     p_target_given_source = np.clip(p_target_given_source, a_min=clip_min, a_max=None)  # Evita log(0)

#     entropy_y_given_z = np.sum(- p_target_given_source * np.log(p_target_given_source), axis=1, keepdims=True)
    
#     mask = p_z.reshape(-1) != 0
#     conditional_entropy = np.sum(entropy_y_given_z * p_z.reshape((-1, 1))[mask])

#     return -conditional_entropy


In [19]:
def negative_conditional_entropy(source_labels: np.ndarray, target_labels: np.ndarray, alpha=1e-30, clip_min=1e-50):
    C_t = int(np.max(target_labels) + 1)
    C_s = int(np.max(source_labels) + 1)
    N = len(source_labels)

    # Compute joint probability with smoothing
    joint = np.zeros((C_t, C_s), dtype=float)
    for s, t in zip(source_labels, target_labels):
        joint[t, s] += 1.0

    joint = (joint + alpha) / (N + alpha * C_s * C_t)

    # Compute marginal P(z)
    p_z = joint.sum(axis=0, keepdims=True)

    # Compute conditional P(y|z) and apply clipping
    p_target_given_source = (joint / (p_z + clip_min)).T  
    p_target_given_source = np.clip(p_target_given_source, a_min=clip_min, a_max=None)

    # Compute entropy
    entropy_y_given_z = np.sum(- p_target_given_source * np.log(p_target_given_source), axis=1, keepdims=True)
    conditional_entropy = np.sum(entropy_y_given_z * p_z.T)

    return -conditional_entropy


In [20]:
def calculate_nce_scores(pool_models, target_loader, device):
    nce_scores = []
    
    for source_name, source_model in pool_models.items():
        
        source_labels = get_source_labels(source_model, target_loader, device)
        target_labels = np.array(target_loader.dataset.labels)    
        
        score = negative_conditional_entropy(source_labels, target_labels)
        nce_scores.append({"Source": source_name, "Score": score})
        
        print(f"Source: {source_name}, NCE Score: {score}")
    
    return nce_scores

### NLEEP

In [21]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
import torch

def NLEEP(probabilities, y, component_ratio=5):
    n = len(y)
    num_classes = len(np.unique(y))
    num_source_classes = probabilities.shape[1] 

    pca_80 = PCA(n_components=0.8)
    X_pca_80 = pca_80.fit_transform(probabilities) 

    n_components_num = component_ratio * num_classes
    gmm = GaussianMixture(n_components=n_components_num, random_state=42).fit(X_pca_80)
    prob = gmm.predict_proba(X_pca_80) 

    pyz = np.zeros((num_classes, n_components_num))
    for y_ in range(num_classes):
        indices = np.where(y == y_)[0]
        if len(indices) > 0:
            filter_ = np.take(prob, indices, axis=0)
            pyz[y_] = np.sum(filter_, axis=0) / n 

    pz = np.sum(pyz, axis=0) + 1e-10  
    py_z = pyz / pz
    py_x = np.dot(prob, py_z.T)

    nleep_score = np.sum(py_x[np.arange(n), y]) / n
    return nleep_score


In [22]:
def calculate_nleep_scores(pool_models, target_loader):
    leep_scores = []
    for source_name, source_model in pool_models.items():
        
        predictions, labels = get_model_predictions(source_model, target_loader)
        score =  NLEEP(predictions, labels)
        leep_scores.append({"Source": source_name, "Score": score})
        
        print(f"Source: {source_name}, NLEEP Score: {score:.4f}")
    return leep_scores

## Implementazione del metodo principale

In [23]:
import random
    
def train_target_task(
    target_name,         
    target_loader, 
    target_num_classes,
    model_pool,     
    training_data, 
    validation_data,
    test_data,
    device="cuda" if torch.cuda.is_available() else "cpu",
    learning_rate=5e-4, 
    epochs=4, 
    patience=1,
    batch_size=32, 
    optimizer_class=torch.optim.AdamW,
    similarity_metric="leep",
    min_accuracy=0.70
):
    
    print(f"Selezione del miglior modello per il task {target_name} usando {similarity_metric}...\n")

    
    os.makedirs("carbon_emissions", exist_ok=True)
    tracker = EmissionsTracker( project_name=f"{target_name}_training", output_dir="carbon_emissions", output_file="selection_emissions.csv", gpu_ids=[6]) 
    tracker.start()  

    start_time = time.time()

    # ------
    random_generator = random.Random(time.time())
    
    selected_models = random_generator.sample(list(model_pool.keys()), 5)
    print(f"Modelli selezionati per il confronto: {selected_models}\n")
    
    model_pool_temp = {nome: model_pool[nome] for nome in selected_models}
    # ------
    
   # Calcolo della metrica di similarità scelta
    if similarity_metric == "leep":
        similarity_scores = calculate_leep_scores(model_pool_temp, target_loader) # cambia model_pool_temp in model_pool 
    elif similarity_metric == "nleep":
        similarity_scores = calculate_nleep_scores(model_pool_temp, target_loader)# cambia model_pool_temp in model_pool 
    elif similarity_metric == "logme":
        similarity_scores = calculate_logme_scores(model_pool_temp,target_loader, device)# cambia model_pool_temp in model_pool 
    elif similarity_metric == "h-score":
        similarity_scores = calculate_h_scores(model_pool_temp, target_loader, device)# cambia model_pool_temp in model_pool 
    elif similarity_metric == "nce":
        similarity_scores = calculate_nce_scores(model_pool_temp, target_loader, device)# cambia model_pool_temp in model_pool 
    else:
        raise ValueError("Errore. Il parametro similarity_metric deve essere: 'leep','nleep', 'logme', 'h-score' oppure 'nce'.")
    
    # Seleziono il miglior modello in base alla metrica scelta
    best_model_entry = max(similarity_scores, key=lambda x: x["Score"])
    best_model_name = best_model_entry["Source"]
    best_model = model_pool_temp[best_model_name] # cambia model_pool_temp in model_pool 
    best_score = best_model_entry["Score"]

    # Test random
    # best_model_name = random.choice(list(model_pool.keys()))
    # best_model = model_pool[best_model_name]
    # best_score = 0
    
    selection_time = time.time() - start_time

    selection_emissions = tracker.stop()
    print(f"\nEmissioni selezione : {selection_emissions:.4f} kg") 
    
    print(f"Modello selezionato: {best_model_name} ({similarity_metric}: {best_score:.4f})\n")

    # Creo un nuovo modello per il task target
    base_model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=target_num_classes)
    
    # Carico gli adapter LoRA del miglior modello selezionato
    new_model = PeftModel.from_pretrained(base_model, f"./adapters-pool/working/{best_model_name}_lora_adapter")
    new_model.to(device)
    print(f"Pesi LoRA del modello {best_model_name} caricati per il task {target_name}.\n")
    
    for name, param in new_model.named_parameters():
        if "lora" in name:   # if "lora" or "classifier" in name:  
            param.requires_grad = True
            
    
    # Imposto i parametri principali per il training 
    # Creo i DataLoader
    train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(validation_data, batch_size=batch_size, shuffle=False)
    
    total_steps = len(train_loader) * epochs

    # Ottimizzatore (solo parametri addestrabili)
    optimizer = optimizer_class(filter(lambda p: p.requires_grad, new_model.parameters()), lr=learning_rate)
    
    # Scheduler
    scheduler = transformers.get_cosine_schedule_with_warmup(optimizer = optimizer,
                                                       num_warmup_steps = 0,
                                                       num_training_steps = total_steps)
    
    # Avvio il fine-tuning
    print(f"Inizio addestramento sul task {target_name}...\n")
    history, total_time, emission = train_and_evaluate_model(
        new_model, target_name, train_loader, val_loader,
        optimizer, scheduler, device, epochs, patience)

    print(f"Addestramento completato.\n")
    
    # Valutazione sul test set
    new_model.load_state_dict(torch.load(f"{target_name}_best_model_state.bin"))
    
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    test_loss, test_acc, test_f1 = eval_model(
        new_model, test_loader, device
    )
    print(f"Test loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}, F1 score: {test_f1:.4f}")
    
    # Salvo il nuovo modello se soddisfa il requisito di accuracy minima
    if test_acc >= min_accuracy:
        
        # Salvo il modello
        updated_pool_path = f"./adapters-pool/working/{target_name}_lora_adapter"
        os.makedirs(updated_pool_path, exist_ok=True)
        new_model.save_pretrained(updated_pool_path)
        print(f"Modello {target_name} salvato in {updated_pool_path}.\n")

        # Aggiungo il nuovo modello al pool
        model_pool[target_name] = new_model
        print(f"Modello {target_name} aggiunto al pool. Ora il pool contiene {len(model_pool)} modelli.\n")
    else:
        print(f"Il modello non ha raggiunto il requisito di accuracy minima, quindi non è stato aggiunto nel pool.\n Accuracy modello: {test_acc:.4f}, Accuracy minima: {min_accuracy:.4f}")
        
        
    return {
        "model": new_model,
        "selected_model": best_model_name,
        "similarity_score": best_score,
        "selection_time": selection_time,
        "selection_emission": selection_emissions,
        "training_time": total_time,
        "emission": emission,
        "history": history,
        "test_loss": test_loss,
        "test_acc": test_acc,
        "test_f1":test_f1,
        "selected_models": selected_models 
    }


In [24]:
from tqdm import tqdm
import time
import torch
from sklearn.metrics import accuracy_score, f1_score
import torch.nn as nn
from codecarbon import EmissionsTracker

os.environ["CODECARBON_LOG_LEVEL"] = "WARNING"


# Funzione di training e valutazione
def train_and_evaluate_model(model, dataset, train_loader, val_loader, optimizer, scheduler, device, epochs, patience):

    os.makedirs("carbon_emissions", exist_ok=True)
    tracker = EmissionsTracker( project_name=f"{dataset} training", output_dir="carbon_emissions", output_file="emissions.csv", gpu_ids=[6]) 
    tracker.start()  

    history = {"train_loss": [], "train_acc": [], "val_loss": [], "val_acc": []}
    best_accuracy = 0
    best_loss = float('inf')
    patience_counter = 0  

    start_time = time.time()

    for epoch in range(epochs):
        print(f"\nEpoch {epoch + 1}/{epochs}")

        # Training
        train_loss, train_acc = train_model(model, train_loader, optimizer, scheduler, device)
        
        # Valutazione
        val_loss, val_acc, val_f1 = eval_model(model, val_loader, device)
        
        # Salvataggio del modello migliore
        if val_acc > best_accuracy:
            print(f"Nuovo miglior modello salvato all'epoca {epoch + 1} (Accuracy: {val_acc:.4f})")
            torch.save(model.state_dict(),  f"{dataset}_best_model_state.bin")
            best_accuracy = val_acc

        # Salvataggio delle metriche
        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            patience_counter = 0 
        else:
            patience_counter += 1
            print(f"La loss sul validation set non è migliorata per {patience_counter} epoche.")

        if patience_counter >= patience:
            print(f"Early stopping attivato dopo {patience_counter} epoche senza miglioramenti")
            break

    end_time = time.time()
    total_training_time = end_time - start_time

    emissions = tracker.stop()
    print(f"\nEmissioni CO₂ totali: {emissions:.4f} kg")  

    return history, total_training_time, emissions

In [25]:
# Funzione di training
def train_model(model, data_loader, optimizer, scheduler, device):

    model = model.train()

    total_loss = 0
    all_preds = []
    all_labels = []

    loop = tqdm(data_loader, desc=f"Training  ", leave=True)

    for batch in loop:

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        token_type_ids = batch['token_type_ids'].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()

        # --- Forward pass ---
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            labels=labels 
        )

        loss = outputs.loss  
        logits = outputs.logits  

        # --- Backward pass ---
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

        preds = torch.argmax(logits, dim=1)  # Predizioni multiclasse

        all_preds.extend(preds.detach().cpu().numpy())
        all_labels.extend(labels.detach().cpu().numpy())

        loop.set_postfix(loss=total_loss / (loop.n + 1), accuracy=accuracy_score(all_labels, all_preds))

    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)

    return avg_loss, accuracy

In [26]:
# Funzione di valutazione
def eval_model(model, data_loader, device):

    model = model.eval()

    total_loss = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        
        loop = tqdm(data_loader, desc=f"Evaluating", leave=True)
        for batch in loop:
            
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            token_type_ids = batch["token_type_ids"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids,
                labels=labels
            )

            loss = outputs.loss
            logits = outputs.logits

            total_loss += loss.item()

            preds = torch.argmax(logits, dim=1)

            all_preds.extend(preds.detach().cpu().numpy())
            all_labels.extend(labels.detach().cpu().numpy())

            loop.set_postfix(Val_loss=total_loss / (loop.n + 1), Val_accuracy=accuracy_score(all_labels, all_preds))

    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average="macro")
    
    return avg_loss, accuracy, f1  


## Caricamento dei nuovi task

### Sentiment 140

In [27]:
# Ottenimento del dataset
sent140_dataset = load_dataset("stanfordnlp/sentiment140",trust_remote_code=True)
print(sent140_dataset)

DatasetDict({
    train: Dataset({
        features: ['text', 'date', 'user', 'sentiment', 'query'],
        num_rows: 1600000
    })
    test: Dataset({
        features: ['text', 'date', 'user', 'sentiment', 'query'],
        num_rows: 498
    })
})


In [28]:
# Divido i dati in training, validation e test set
sent140_data = sent140_dataset["train"].shuffle(seed=42)

sent140_temp_sentences, sent140_test_sentences, sent140_temp_labels, sent140_test_labels = train_test_split(
                                                sent140_data['text'], 
                                                sent140_data['sentiment'], 
                                                test_size=1024, 
                                                random_state=42,
                                                stratify=sent140_data['sentiment'])

sent140_train_sentences, sent140_val_sentences, sent140_train_labels, sent140_val_labels = train_test_split(
                                                sent140_temp_sentences, 
                                                sent140_temp_labels, 
                                                train_size=20000,
                                                test_size=1000,
                                                random_state=42,
                                                stratify=sent140_temp_labels)

# Trasformazione delle etichette 0 -> 0 e 4->1
sent140_train_labels = [1 if label == 4 else 0 for label in sent140_train_labels]
sent140_val_labels = [1 if label == 4 else 0 for label in sent140_val_labels]
sent140_test_labels = [1 if label == 4 else 0 for label in sent140_test_labels]

print("Dimensioni dei set:")
print(f"Train: {len(sent140_train_sentences)}")
print(f"Validation: {len(sent140_val_sentences)}")
print(f"Test: {len(sent140_test_sentences)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(sent140_train_labels)}")
print(f"Validation: {Counter(sent140_val_labels)}")
print(f"Test: {Counter(sent140_test_labels)}")

Dimensioni dei set:
Train: 20000
Validation: 1000
Test: 1024

Distribuzione delle etichette:
Train: Counter({0: 10000, 1: 10000})
Validation: Counter({1: 500, 0: 500})
Test: Counter({1: 512, 0: 512})


In [29]:
from torch.utils.data import Dataset

class ClassificationDataset(Dataset):

    def __init__(self, sentences, labels, tokenizer, max_len):
        self.sentences = sentences
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.sentences)
    
    def __getitem__(self,index):
        sentence = self.sentences[index]
        label = self.labels[index]
        
        encoding = self.tokenizer.encode_plus(
            sentence,
            add_special_tokens=True,
            max_length=self.max_len,
            truncation=True,
            return_token_type_ids=True,
            padding="max_length",
            return_attention_mask=True,
            return_tensors='pt')
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'token_type_ids': encoding["token_type_ids"].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
            }

In [30]:
from transformers import RobertaTokenizer
from torch.utils.data import DataLoader

MAX_SEQ_LEN = 128
BATCH_SIZE = 32

# Inizializzo il Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Ottiengo i dataset
sent140_training_data = ClassificationDataset(
                           sentences = sent140_train_sentences,
                           labels = sent140_train_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

sent140_validation_data = ClassificationDataset(
                           sentences = sent140_val_sentences,
                           labels = sent140_val_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

sent140_test_data = ClassificationDataset(
                           sentences = sent140_test_sentences,
                           labels = sent140_test_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

# Creo i DataLoader
sent140_train_loader = DataLoader(sent140_training_data, batch_size=BATCH_SIZE, shuffle=True)
sent140_val_loader = DataLoader(sent140_validation_data, batch_size=BATCH_SIZE, shuffle=False)
sent140_test_loader = DataLoader(sent140_test_data, batch_size=BATCH_SIZE, shuffle=False)

### IMDB Rewies

In [31]:
# Ottenimento del dataset
imdb_dataset = load_dataset("stanfordnlp/imdb")
print(imdb_dataset)

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})


In [32]:
from datasets import Dataset

imdb_train_data = pd.DataFrame(imdb_dataset["train"])
imdb_test_data = pd.DataFrame(imdb_dataset["test"])

imdb_data = Dataset.from_pandas(pd.concat([imdb_train_data, imdb_test_data], ignore_index=True))
imdb_data = imdb_data.shuffle(seed=42)

imdb_temp_sentences, imdb_test_sentences, imdb_temp_labels, imdb_test_labels = train_test_split(
                                                imdb_data['text'],
                                                imdb_data['label'], 
                                                test_size=0.1, 
                                                random_state=42,
                                                stratify=imdb_data['label'])

imdb_train_sentences, imdb_val_sentences, imdb_train_labels, imdb_val_labels = train_test_split(
                                                imdb_temp_sentences,
                                                imdb_temp_labels,
                                                test_size=0.1111,
                                                random_state=42,
                                                stratify=imdb_temp_labels)

print("Dimensioni dei set:")
print(f"Train: {len(imdb_train_sentences)}")
print(f"Validation: {len(imdb_val_sentences)}")
print(f"Test: {len(imdb_test_sentences)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(imdb_train_labels)}")
print(f"Validation: {Counter(imdb_val_labels)}")
print(f"Test: {Counter(imdb_test_labels)}")

Dimensioni dei set:
Train: 40000
Validation: 5000
Test: 5000

Distribuzione delle etichette:
Train: Counter({0: 20000, 1: 20000})
Validation: Counter({0: 2500, 1: 2500})
Test: Counter({1: 2500, 0: 2500})


In [33]:
from transformers import RobertaTokenizer
from torch.utils.data import DataLoader

MAX_SEQ_LEN = 128
BATCH_SIZE = 32

# Inizializzo il Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Ottiengo i dataset
imdb_training_data = ClassificationDataset(sentences = imdb_train_sentences,
                           labels = imdb_train_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

imdb_validation_data = ClassificationDataset(sentences = imdb_val_sentences,
                           labels = imdb_val_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

imdb_test_data = ClassificationDataset(sentences = imdb_test_sentences,
                           labels = imdb_test_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

# Creo i DataLoader
imdb_train_loader = DataLoader(imdb_training_data, batch_size=BATCH_SIZE, shuffle=True)
imdb_val_loader = DataLoader(imdb_validation_data, batch_size=BATCH_SIZE, shuffle=False)
imdb_test_loader = DataLoader(imdb_test_data, batch_size=BATCH_SIZE, shuffle=False)

### 20 News Group

In [34]:
# Ottenimento del dataset
news_dataset = load_dataset("SetFit/20_newsgroups")
print(news_dataset)

Repo card metadata block was not found. Setting CardData to empty.


DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'label_text'],
        num_rows: 11314
    })
    test: Dataset({
        features: ['text', 'label', 'label_text'],
        num_rows: 7532
    })
})


In [35]:
from datasets import Dataset

news_train_data = pd.DataFrame(news_dataset["train"])
news_test_data = pd.DataFrame(news_dataset["test"])

news_data = Dataset.from_pandas(pd.concat([news_train_data, news_test_data], ignore_index=True))
news_data = news_data.shuffle(seed=42)

news_temp_sentences, news_test_sentences, news_temp_labels, news_test_labels = train_test_split(
                                                news_data['text'],
                                                news_data['label'], 
                                                test_size=0.1, 
                                                random_state=42,
                                                stratify=news_data['label'])

news_train_sentences, news_val_sentences, news_train_labels, news_val_labels = train_test_split(
                                                news_temp_sentences,
                                                news_temp_labels,
                                                test_size=0.1111,
                                                random_state=42,
                                                stratify=news_temp_labels)

print("Dimensioni dei set:")
print(f"Train: {len(news_train_sentences)}")
print(f"Validation: {len(news_val_sentences)}")
print(f"Test: {len(news_test_sentences)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(news_train_labels)}")
print(f"Validation: {Counter(news_val_labels)}")
print(f"Test: {Counter(news_test_labels)}")

Dimensioni dei set:
Train: 15076
Validation: 1885
Test: 1885

Distribuzione delle etichette:
Train: Counter({10: 799, 15: 797, 8: 796, 9: 795, 11: 793, 13: 792, 7: 792, 5: 790, 14: 789, 12: 788, 2: 788, 3: 786, 6: 780, 1: 779, 4: 771, 17: 752, 16: 728, 0: 639, 18: 620, 19: 502})
Validation: Counter({10: 100, 8: 100, 9: 100, 15: 100, 14: 99, 13: 99, 7: 99, 11: 99, 5: 99, 3: 98, 12: 98, 2: 98, 6: 97, 1: 97, 4: 96, 17: 94, 16: 91, 0: 80, 18: 78, 19: 63})
Test: Counter({8: 100, 10: 100, 15: 100, 11: 99, 9: 99, 7: 99, 13: 99, 14: 99, 5: 99, 2: 99, 6: 98, 3: 98, 12: 98, 1: 97, 4: 96, 17: 94, 16: 91, 0: 80, 18: 77, 19: 63})


In [36]:
MAX_SEQ_LEN = 128
BATCH_SIZE = 32

# Inizializzo il Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Ottiengo i dataset
news_training_data = ClassificationDataset(sentences = news_train_sentences,
                           labels = news_train_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

news_validation_data = ClassificationDataset(sentences = news_val_sentences,
                           labels = news_val_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

news_test_data = ClassificationDataset(sentences = news_test_sentences,
                           labels = news_test_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

# Creo i DataLoader
news_train_loader = DataLoader(news_training_data, batch_size=BATCH_SIZE, shuffle=True)
news_val_loader = DataLoader(news_validation_data, batch_size=BATCH_SIZE, shuffle=False)
news_test_loader = DataLoader(news_test_data, batch_size=BATCH_SIZE, shuffle=False)

### DBpedia 14

In [37]:
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import Counter

# Ottenimento del dataset
dbpedia_train_dataset = pd.read_csv('./dataset/dbpedia-ontology-dataset/train.csv')
dbpedia_val_test_dataset = pd.read_csv('./dataset/dbpedia-ontology-dataset/test.csv')

# Costruisco il training set in modo da avere 2000 esempi per ognuna delle 14 classi
dbpedia_train_dataset = dbpedia_train_dataset.groupby("label").apply(lambda x: x.sample(n=2000, random_state=42))
dbpedia_train_dataset.reset_index(drop=True, inplace=True)

dbpedia_train_sentences, dbpedia_train_labels = dbpedia_train_dataset['content'], dbpedia_train_dataset['label']

# Divido i dati di test in test e val set
dbpedia_val_sentences, dbpedia_test_sentences, dbpedia_val_labels, dbpedia_test_labels = train_test_split(
                                                dbpedia_val_test_dataset['content'], 
                                                dbpedia_val_test_dataset['label'], 
                                                train_size=5000,
                                                test_size=5000,
                                                random_state=42,
                                                stratify=dbpedia_val_test_dataset['label']
                                            )

dbpedia_val_sentences = dbpedia_val_sentences.reset_index(drop=True)
dbpedia_val_labels = dbpedia_val_labels.reset_index(drop=True)

dbpedia_test_sentences = dbpedia_test_sentences.reset_index(drop=True)
dbpedia_test_labels = dbpedia_test_labels.reset_index(drop=True)

print("Dimensioni dei set:")
print(f"Train: {len(dbpedia_train_sentences)}")
print(f"Validation: {len(dbpedia_val_sentences)}")
print(f"Test: {len(dbpedia_test_sentences)}")

print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(dbpedia_train_labels.tolist())}")
print(f"Validation: {Counter(dbpedia_val_labels.tolist())}")
print(f"Test: {Counter(dbpedia_test_labels.tolist())}")


Dimensioni dei set:
Train: 28000
Validation: 5000
Test: 5000

Distribuzione delle etichette:
Train: Counter({0: 2000, 1: 2000, 2: 2000, 3: 2000, 4: 2000, 5: 2000, 6: 2000, 7: 2000, 8: 2000, 9: 2000, 10: 2000, 11: 2000, 12: 2000, 13: 2000})
Validation: Counter({11: 358, 9: 358, 2: 357, 7: 357, 1: 357, 10: 357, 6: 357, 3: 357, 0: 357, 13: 357, 12: 357, 5: 357, 8: 357, 4: 357})
Test: Counter({12: 358, 10: 358, 1: 357, 6: 357, 3: 357, 5: 357, 9: 357, 4: 357, 2: 357, 0: 357, 11: 357, 8: 357, 7: 357, 13: 357})


  dbpedia_train_dataset = dbpedia_train_dataset.groupby("label").apply(lambda x: x.sample(n=2000, random_state=42))


In [38]:
from transformers import RobertaTokenizer
from torch.utils.data import DataLoader

MAX_SEQ_LEN = 512
BATCH_SIZE = 32

# Inizializzo il Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Ottiengo i dataset
dbpedia_training_data = ClassificationDataset(sentences = dbpedia_train_sentences,
                           labels = dbpedia_train_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

dbpedia_validation_data = ClassificationDataset(sentences = dbpedia_val_sentences,
                           labels = dbpedia_val_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

dbpedia_test_data = ClassificationDataset(sentences = dbpedia_test_sentences,
                           labels = dbpedia_test_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

# Creo i DataLoader
dbpedia_train_loader = DataLoader(dbpedia_training_data, batch_size=BATCH_SIZE, shuffle=True)
dbpedia_val_loader = DataLoader(dbpedia_validation_data, batch_size=BATCH_SIZE, shuffle=False)
dbpedia_test_loader = DataLoader(dbpedia_test_data, batch_size=BATCH_SIZE, shuffle=False)

### Emotion Dataset

In [39]:
# Ottenimento del dataset
emotion_dataset = load_dataset("dair-ai/emotion")
print(emotion_dataset)

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})


In [40]:
from collections import Counter

#Divido i dati in training, validation e test set
emotion_train_data = emotion_dataset["train"].shuffle(seed=42)
emotion_val_data = emotion_dataset["validation"].shuffle(seed=42)
emotion_test_data = emotion_dataset["test"].shuffle(seed=42)


emotion_train_sentences, emotion_train_labels = emotion_train_data['text'],emotion_train_data['label']
emotion_val_sentences, emotion_val_labels = emotion_val_data['text'],emotion_val_data['label']
emotion_test_sentences, emotion_test_labels = emotion_test_data['text'],emotion_test_data['label']


print("Dimensioni dei set:")
print(f"Train: {len(emotion_train_sentences)}")
print(f"Validation: {len(emotion_val_sentences)}")
print(f"Test: {len(emotion_test_sentences)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(emotion_train_labels)}")
print(f"Validation: {Counter(emotion_val_labels)}")
print(f"Test: {Counter(emotion_test_labels)}")

Dimensioni dei set:
Train: 16000
Validation: 2000
Test: 2000

Distribuzione delle etichette:
Train: Counter({1: 5362, 0: 4666, 3: 2159, 4: 1937, 2: 1304, 5: 572})
Validation: Counter({1: 704, 0: 550, 3: 275, 4: 212, 2: 178, 5: 81})
Test: Counter({1: 695, 0: 581, 3: 275, 4: 224, 2: 159, 5: 66})


In [41]:
MAX_SEQ_LEN = 128
BATCH_SIZE = 32

# Inizializzo il Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Ottiengo i dataset
emotion_training_data = ClassificationDataset(
                            sentences = emotion_train_sentences,
                            labels = emotion_train_labels,
                            tokenizer = tokenizer,
                            max_len = MAX_SEQ_LEN)

emotion_validation_data = ClassificationDataset(
                            sentences = emotion_val_sentences,
                            labels = emotion_val_labels,
                            tokenizer = tokenizer,
                            max_len = MAX_SEQ_LEN)

emotion_test_data = ClassificationDataset(
                            sentences = emotion_test_sentences,
                            labels = emotion_test_labels,
                            tokenizer = tokenizer,
                            max_len = MAX_SEQ_LEN)

# Creo i DataLoader
emotion_train_loader = DataLoader(emotion_training_data, batch_size=BATCH_SIZE, shuffle=True)
emotion_val_loader = DataLoader(emotion_validation_data, batch_size=BATCH_SIZE, shuffle=False)
emotion_test_loader = DataLoader(emotion_test_data, batch_size=BATCH_SIZE, shuffle=False)

### RTE 

In [42]:
# Ottenimento del dataset
rte_dataset = load_dataset("glue", "rte")
print(rte_dataset)

DatasetDict({
    train: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 2490
    })
    validation: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 277
    })
    test: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 3000
    })
})


In [43]:
from datasets import Dataset

rte_train_data = pd.DataFrame(rte_dataset["train"])
rte_val_data = pd.DataFrame(rte_dataset["validation"])

rte_data = Dataset.from_pandas(pd.concat([rte_train_data, rte_val_data], ignore_index=True))
rte_data = rte_data.shuffle(seed=42)

rte_temp_sentences1, rte_test_sentences1, rte_temp_sentences2, rte_test_sentences2,  rte_temp_labels, rte_test_labels = train_test_split(
                                                rte_data['sentence1'],
                                                rte_data['sentence2'],
                                                rte_data['label'], 
                                                test_size=0.1, 
                                                random_state=42,
                                                stratify=rte_data['label'])

rte_train_sentences1, rte_val_sentences1, rte_train_sentences2, rte_val_sentences2, rte_train_labels, rte_val_labels = train_test_split(
                                                rte_temp_sentences1,
                                                rte_temp_sentences2,
                                                rte_temp_labels,
                                                test_size=0.1111,
                                                random_state=42,
                                                stratify=rte_temp_labels)

print("Dimensioni dei set:")
print(f"Train: {len(rte_train_sentences1)}")
print(f"Validation: {len(rte_val_sentences1)}")
print(f"Test: {len(rte_test_sentences1)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(rte_train_labels)}")
print(f"Validation: {Counter(rte_val_labels)}")
print(f"Test: {Counter(rte_test_labels)}")

Dimensioni dei set:
Train: 2213
Validation: 277
Test: 277

Distribuzione delle etichette:
Train: Counter({0: 1115, 1: 1098})
Validation: Counter({0: 140, 1: 137})
Test: Counter({0: 140, 1: 137})


In [44]:
from torch.utils.data import Dataset

class NLIDataset(Dataset):

    def __init__(self, sentences1, sentences2 , labels, tokenizer, max_len):
        self.sentences1 = sentences1
        self.sentences2 = sentences2
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.sentences1)
    
    def __getitem__(self,index):
        sentence1 = self.sentences1[index]
        sentence2 = self.sentences2[index]
        label = self.labels[index]
        
        encoding = self.tokenizer.encode_plus(
            sentence1,
            sentence2,
            add_special_tokens=True,
            max_length=self.max_len,
            truncation=True,
            return_token_type_ids=True,
            padding="max_length",
            return_attention_mask=True,
            return_tensors='pt')
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'token_type_ids': encoding["token_type_ids"].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
            }

In [45]:
MAX_SEQ_LEN = 256
BATCH_SIZE = 32

# Inizializzo il Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Ottiengo i dataset
rte_training_data = NLIDataset(
                           sentences1 = rte_train_sentences1,
                           sentences2 = rte_train_sentences2,
                           labels = rte_train_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

rte_validation_data = NLIDataset(
                           sentences1 = rte_val_sentences1,
                           sentences2 = rte_val_sentences2,
                           labels = rte_val_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

rte_test_data = NLIDataset(
                           sentences1 = rte_test_sentences1,
                           sentences2 = rte_test_sentences2,
                           labels = rte_test_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

# Creo i DataLoader
rte_train_loader = DataLoader(rte_training_data, batch_size=BATCH_SIZE, shuffle=True)
rte_val_loader = DataLoader(rte_validation_data, batch_size=BATCH_SIZE, shuffle=False)
rte_test_loader = DataLoader(rte_test_data, batch_size=BATCH_SIZE, shuffle=False)

### QQP

In [46]:
from datasets import load_dataset

# Ottenimento del dataset
qqp_dataset = load_dataset("glue", "qqp")
print(qqp_dataset)

DatasetDict({
    train: Dataset({
        features: ['question1', 'question2', 'label', 'idx'],
        num_rows: 363846
    })
    validation: Dataset({
        features: ['question1', 'question2', 'label', 'idx'],
        num_rows: 40430
    })
    test: Dataset({
        features: ['question1', 'question2', 'label', 'idx'],
        num_rows: 390965
    })
})


In [47]:
from collections import Counter
from datasets import load_dataset, Dataset

def balance_dataset(dataset, num_example):
    class_0 = [example for example in dataset if example["label"] == 0][:num_example]
    class_1 = [example for example in dataset if example["label"] == 1][:num_example]

    balanced_data = class_0 + class_1

    balanced_dataset = Dataset.from_list(balanced_data)
    balanced_dataset = balanced_dataset.shuffle(seed=42)

    return balanced_dataset 


def get_val_test_set(dataset, val_size, test_size):
    class_0 = [example for example in dataset if example["label"] == 0]
    class_1 = [example for example in dataset if example["label"] == 1]    

    val_set = class_0[:val_size//2] + class_1[:val_size//2]
    test_set = class_0[val_size//2:val_size//2 + test_size//2] + class_1[val_size//2:val_size//2 + test_size//2]

    val_set = Dataset.from_dict({k: [example[k] for example in val_set] for k in val_set[0]})
    test_set = Dataset.from_dict({k: [example[k] for example in test_set] for k in test_set[0]})
    
    val_set = val_set.shuffle(seed=42)
    test_set = test_set.shuffle(seed=42)

    return val_set, test_set


qqp_train_data = qqp_dataset["train"]
qqp_val_test_data = qqp_dataset["validation"]

qqp_train_data = balance_dataset(qqp_train_data,10000)
qqp_val_data, qqp_test_data  =  get_val_test_set(qqp_val_test_data, 5000, 5000)

qqp_train_questions1, qqp_train_questions2, qqp_train_labels = qqp_train_data['question1'],  qqp_train_data['question2'], qqp_train_data['label']
qqp_val_questions1, qqp_val_questions2, qqp_val_labels = qqp_val_data['question1'],  qqp_val_data['question2'], qqp_val_data['label']
qqp_test_questions1, qqp_test_questions2, qqp_test_labels = qqp_test_data['question1'],  qqp_test_data['question2'], qqp_test_data['label']

print("Dimensioni dei set:")
print(f"Train: {len(qqp_train_questions1)}")
print(f"Validation: {len(qqp_val_questions1)}")
print(f"Test: {len(qqp_test_questions1)}")

print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(qqp_train_labels)}")
print(f"Validation: {Counter(qqp_val_labels)}")
print(f"Test: {Counter(qqp_test_labels)}")

Dimensioni dei set:
Train: 20000
Validation: 5000
Test: 5000

Distribuzione delle etichette:
Train: Counter({1: 10000, 0: 10000})
Validation: Counter({0: 2500, 1: 2500})
Test: Counter({0: 2500, 1: 2500})


In [48]:
from transformers import RobertaTokenizer
from torch.utils.data import DataLoader

MAX_SEQ_LEN = 256
BATCH_SIZE = 32

# Inizializzo il Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Ottiengo i dataset
qqp_training_data = NLIDataset(
                           sentences1 = qqp_train_questions1,
                           sentences2 = qqp_train_questions2,
                           labels = qqp_train_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

qqp_validation_data = NLIDataset(
                           sentences1 = qqp_val_questions1,
                           sentences2 = qqp_val_questions2,
                           labels = qqp_val_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

qqp_test_data = NLIDataset(
                           sentences1 = qqp_test_questions1,
                           sentences2 = qqp_test_questions2,
                           labels = qqp_test_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

# Creo i DataLoader
qqp_train_loader = DataLoader(qqp_training_data, batch_size=BATCH_SIZE, shuffle=True)
qqp_val_loader = DataLoader(qqp_validation_data, batch_size=BATCH_SIZE, shuffle=False)
qqp_test_loader = DataLoader(qqp_test_data, batch_size=BATCH_SIZE, shuffle=False)

### COLA

In [49]:
from datasets import load_dataset, Dataset

# Ottenimento del dataset
cola_dataset = load_dataset("glue", "cola")
print(cola_dataset)

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 8551
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1043
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1063
    })
})


In [50]:
from sklearn.model_selection import train_test_split
from collections import Counter

cola_train_data = pd.DataFrame(cola_dataset["train"])
cola_val_data = pd.DataFrame(cola_dataset["validation"])

cola_data = Dataset.from_pandas(pd.concat([cola_train_data, cola_val_data], ignore_index=True))
cola_data = cola_data.shuffle(seed=42)

cola_temp_sentences, cola_test_sentences, cola_temp_labels, cola_test_labels = train_test_split(
                                                cola_data['sentence'],
                                                cola_data['label'], 
                                                test_size=0.1, 
                                                random_state=42,
                                                stratify=cola_data['label'])

cola_train_sentences, cola_val_sentences, cola_train_labels, cola_val_labels = train_test_split(
                                                cola_temp_sentences, 
                                                cola_temp_labels,
                                                test_size=0.1111,
                                                random_state=42,
                                                stratify=cola_temp_labels)

print("Dimensioni dei set:")
print(f"Train: {len(cola_train_sentences)}")
print(f"Validation: {len(cola_val_sentences)}")
print(f"Test: {len(cola_test_sentences)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(cola_train_labels)}")
print(f"Validation: {Counter(cola_val_labels)}")
print(f"Test: {Counter(cola_test_labels)}")

Dimensioni dei set:
Train: 7674
Validation: 960
Test: 960

Distribuzione delle etichette:
Train: Counter({1: 5394, 0: 2280})
Validation: Counter({1: 675, 0: 285})
Test: Counter({1: 675, 0: 285})


In [51]:
from transformers import RobertaTokenizer
from torch.utils.data import DataLoader

MAX_SEQ_LEN = 128
BATCH_SIZE = 32

# Inizializzo il Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Ottiengo i dataset
cola_training_data = ClassificationDataset(sentences = cola_train_sentences,
                           labels = cola_train_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

cola_validation_data = ClassificationDataset(sentences = cola_val_sentences,
                           labels = cola_val_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

cola_test_data = ClassificationDataset(sentences = cola_test_sentences,
                           labels = cola_test_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

# Creo i DataLoader
cola_train_loader = DataLoader(cola_training_data, batch_size=BATCH_SIZE, shuffle=True)
cola_val_loader = DataLoader(cola_validation_data, batch_size=BATCH_SIZE, shuffle=False)
cola_test_loader = DataLoader(cola_test_data, batch_size=BATCH_SIZE, shuffle=False)

## Controllo similarità dei modelli del pool

#### AG News

In [52]:
# # ottenimento del dataset

# ag_dataset = load_dataset("ag_news")
# print(ag_dataset)

In [53]:
# from sklearn.model_selection import train_test_split
# from collections import Counter

# ag_train_dataset = ag_dataset["train"]
# ag_test_dataset = ag_dataset["test"]

# ag_train_sentences, ag_val_sentences, ag_train_labels, ag_val_labels = train_test_split(
#                                                   ag_train_dataset['text'], 
#                                                   ag_train_dataset['label'],
#                                                   test_size=4000, 
#                                                   train_size=20000,
#                                                   random_state=42,
#                                                   shuffle=True,
#                                                   stratify=ag_train_dataset['label'])

# ag_test_sentences, ag_test_labels = ag_test_dataset['text'], ag_test_dataset['label']


# print("Dimensioni dei set:")
# print(f"Train: {len(ag_train_sentences)}")
# print(f"Validation: {len(ag_val_sentences)}")
# print(f"Test: {len(ag_test_sentences)}")

# # Verifica distribuzione delle etichette
# print("\nDistribuzione delle etichette:")
# print(f"Train: {Counter(ag_train_labels)}")
# print(f"Validation: {Counter(ag_val_labels)}")
# print(f"Test: {Counter(ag_test_labels)}")

In [54]:
# from transformers import RobertaTokenizer
# from torch.utils.data import DataLoader

# MAX_SEQ_LEN = 128

# # Inizializza il Tokenizer
# tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# #Ottieni i dataset
# ag_training_data = ClassificationDataset( sentences = ag_train_sentences,
#                            labels = ag_train_labels,
#                            tokenizer = tokenizer,
#                            max_len = MAX_SEQ_LEN)

# ag_validation_data = ClassificationDataset( sentences = ag_val_sentences,
#                              labels = ag_val_labels,
#                              tokenizer = tokenizer,
#                              max_len = MAX_SEQ_LEN)

# ag_test_data = ClassificationDataset( sentences = ag_test_sentences,
#                        labels = ag_test_labels,
#                        tokenizer = tokenizer,
#                        max_len = MAX_SEQ_LEN)

# # Creo i DataLoader
# ag_train_loader = DataLoader(ag_training_data, batch_size=BATCH_SIZE, shuffle=True)
# ag_val_loader = DataLoader(ag_validation_data, batch_size=BATCH_SIZE, shuffle=False)
# ag_test_loader = DataLoader(ag_test_data, batch_size=BATCH_SIZE, shuffle=False)

#### SST-2

In [55]:
# from datasets import load_dataset

# sst_dataset = load_dataset('glue','sst2')
# print(sst_dataset)

In [56]:
# from sklearn.model_selection import train_test_split
# from collections import Counter

# sst_data = sst_dataset['train'].shuffle(seed=42)

# sst_temp_sentences, sst_test_sentences, sst_temp_labels, sst_test_labels = train_test_split(
#                                                   sst_data['sentence'], 
#                                                   sst_data['label'], 
#                                                   test_size=4000, 
#                                                   random_state=42,
#                                                   stratify=sst_data['label'])

# sst_train_sentences, sst_val_sentences, sst_train_labels, sst_val_labels = train_test_split(
#                                                   sst_temp_sentences, 
#                                                   sst_temp_labels,
#                                                   train_size=20000,
#                                                   test_size=4000, 
#                                                   random_state=42,
#                                                   stratify=sst_temp_labels)


# print("Dimensioni dei set:")
# print(f"Train: {len(sst_train_sentences)}")
# print(f"Validation: {len(sst_val_sentences)}")
# print(f"Test: {len(sst_test_sentences)}")

# # Verifica distribuzione delle etichette
# print("\nDistribuzione delle etichette:")
# print(f"Train: {Counter(sst_train_labels)}")
# print(f"Validation: {Counter(sst_val_labels)}")
# print(f"Test: {Counter(sst_test_labels)}") 

In [57]:

# from torch.utils.data import DataLoader

# MAX_SEQ_LEN = 128

# # Inizializza il Tokenizer
# tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# #Ottieni i dataset
# sst_training_data = ClassificationDataset(sentences = sst_train_sentences,
#                            labels = sst_train_labels,
#                            tokenizer = tokenizer,
#                            max_len = MAX_SEQ_LEN)

# sst_validation_data = ClassificationDataset(sentences = sst_val_sentences,
#                            labels = sst_val_labels,
#                            tokenizer = tokenizer,
#                            max_len = MAX_SEQ_LEN)

# sst_test_data = ClassificationDataset(sentences = sst_test_sentences,
#                            labels = sst_test_labels,
#                            tokenizer = tokenizer,
#                            max_len = MAX_SEQ_LEN)

# # Creo i DataLoader
# sst_train_loader = DataLoader(sst_training_data, batch_size=BATCH_SIZE, shuffle=True)
# sst_val_loader = DataLoader(sst_validation_data, batch_size=BATCH_SIZE, shuffle=False)
# sst_test_loader = DataLoader(sst_test_data, batch_size=BATCH_SIZE, shuffle=False)

#### EmoInt

In [58]:
# def load_emoint_dataset(file_path):
#     label_map = {"anger": 0, "joy": 1, "sadness": 2, "fear": 3}
    
#     df = pd.read_csv(file_path, sep="\t", header=None, names=["id", "sentence", "label", "intensity"])
    
#     df = df[["sentence", "label"]]
#     df["label"] = df["label"].map(label_map)
    
#     df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    
#     return df


# ei_dataset = load_emoint_dataset("./dataset/EmotionIntensity dataset/Emotion Intensity Dataset.txt")
# print(ei_dataset.head())

In [59]:
# from sklearn.model_selection import train_test_split
# from collections import Counter

# # Divido i dati in training set, validation set e test set
# ei_temp_sentences, ei_test_sentences, ei_temp_labels, ei_test_labels = train_test_split(
#                                                 ei_dataset['sentence'],
#                                                 ei_dataset['label'], 
#                                                 test_size=0.1, 
#                                                 random_state=42,
#                                                 stratify=ei_dataset['label'])

# ei_train_sentences, ei_val_sentences, ei_train_labels, ei_val_labels = train_test_split(
#                                                 ei_temp_sentences,
#                                                 ei_temp_labels,
#                                                 test_size=0.1111,
#                                                 random_state=42,
#                                                 stratify=ei_temp_labels)

# ei_train_sentences = ei_train_sentences.reset_index(drop=True)
# ei_val_sentences = ei_val_sentences.reset_index(drop=True)
# ei_test_sentences = ei_test_sentences.reset_index(drop=True)
# ei_train_labels = ei_train_labels.reset_index(drop=True)
# ei_val_labels = ei_val_labels.reset_index(drop=True)
# ei_test_labels = ei_test_labels.reset_index(drop=True)


# print("Dimensioni dei set:")
# print(f"Train: {len(ei_train_sentences)}")
# print(f"Validation: {len(ei_val_sentences)}")
# print(f"Test: {len(ei_test_sentences)}")

# # Verifica distribuzione delle etichette
# print("\nDistribuzione delle etichette:")
# print(f"Train: {Counter(ei_train_labels)}")
# print(f"Validation: {Counter(ei_val_labels)}")
# print(f"Test: {Counter(ei_test_labels)}")

In [60]:

# from torch.utils.data import DataLoader

# MAX_SEQ_LEN = 128

# # Inizializza il Tokenizer
# tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# #Ottieni i dataset
# ei_training_data = ClassificationDataset(sentences = ei_train_sentences,
#                            labels = ei_train_labels,
#                            tokenizer = tokenizer,
#                            max_len = MAX_SEQ_LEN)

# ei_validation_data = ClassificationDataset(sentences = ei_val_sentences,
#                            labels = ei_val_labels,
#                            tokenizer = tokenizer,
#                            max_len = MAX_SEQ_LEN)

# ei_test_data = ClassificationDataset(sentences = ei_test_sentences,
#                            labels = ei_test_labels,
#                            tokenizer = tokenizer,
#                            max_len = MAX_SEQ_LEN)

# # Creo i DataLoader
# ei_train_loader = DataLoader(ei_training_data, batch_size=BATCH_SIZE, shuffle=True)
# ei_val_loader = DataLoader(ei_validation_data, batch_size=BATCH_SIZE, shuffle=False)
# ei_test_loader = DataLoader(ei_test_data, batch_size=BATCH_SIZE, shuffle=False)

#### MNLI

In [61]:
# from datasets import load_dataset
# from sklearn.model_selection import train_test_split
# from collections import Counter

# # Carico il datast
# mnli_dataset = load_dataset('glue', 'mnli')
# print(mnli_dataset)


# # Divido i dati in training set, validation set e test set
# mnli_data = mnli_dataset['train'].shuffle(seed=42)

# mnli_temp_premises, mnli_test_premises, mnli_temp_hypotheses, mnli_test_hypotheses, mnli_temp_labels, mnli_test_labels = train_test_split(
#                                                   mnli_data['premise'], 
#                                                   mnli_data['hypothesis'],                
#                                                   mnli_data['label'], 
#                                                   test_size=3000, 
#                                                   random_state=42,
#                                                   stratify=mnli_data['label'])

# mnli_train_premises, mnli_val_premises, mnli_train_hypotheses, mnli_val_hypotheses, mnli_train_labels, mnli_val_labels = train_test_split(
#                                                   mnli_temp_premises, 
#                                                   mnli_temp_hypotheses,
#                                                   mnli_temp_labels,
#                                                   train_size=45000,
#                                                   test_size=3000, 
#                                                   random_state=42,
#                                                   stratify=mnli_temp_labels)

# print("Dimensioni dei set:")
# print(f"Train: {len(mnli_train_premises)}")
# print(f"Validation: {len(mnli_val_premises)}")
# print(f"Test: {len(mnli_test_premises)}")

# # Verifica distribuzione delle etichette
# print("\nDistribuzione delle etichette:")
# print(f"Train: {Counter(mnli_train_labels)}")
# print(f"Validation: {Counter(mnli_val_labels)}")
# print(f"Test: {Counter(mnli_test_labels)}")

In [62]:

# from torch.utils.data import DataLoader

# MAX_SEQ_LEN = 512

# # Inizializza il Tokenizer
# tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# #Ottieni i dataset
# mnli_training_data = NLIDataset(sentences1 = mnli_train_premises,
#                             sentences2 = mnli_train_hypotheses,
#                             labels = mnli_train_labels,
#                             tokenizer = tokenizer,
#                             max_len = MAX_SEQ_LEN)

# mnli_validation_data = NLIDataset(sentences1 = mnli_val_premises,
#                             sentences2 = mnli_val_hypotheses,
#                             labels = mnli_val_labels,
#                             tokenizer = tokenizer,
#                             max_len = MAX_SEQ_LEN)

# mnli_test_data = NLIDataset(sentences1 = mnli_test_premises,
#                             sentences2 = mnli_test_hypotheses,
#                             labels = mnli_test_labels,
#                             tokenizer = tokenizer,
#                             max_len = MAX_SEQ_LEN)

# # Creo i DataLoader
# mnli_train_loader = DataLoader(mnli_training_data, batch_size=BATCH_SIZE, shuffle=True)
# mnli_val_loader = DataLoader(mnli_validation_data, batch_size=BATCH_SIZE, shuffle=False)
# mnli_test_loader = DataLoader(mnli_test_data, batch_size=BATCH_SIZE, shuffle=False)

#### PAWS

In [63]:
# from datasets import load_dataset

# # Carico il datast
# paws_dataset = load_dataset("google-research-datasets/paws", "labeled_final")
# print(paws_dataset)

In [64]:
# from sklearn.model_selection import train_test_split
# from collections import Counter

# paws_train_set = paws_dataset["train"]
# paws_val_set = paws_dataset["validation"]
# paws_test_set = paws_dataset["test"]

# paws_train_sentences1, paws_train_sentences2, paws_train_labels = paws_train_set['sentence1'], paws_train_set['sentence2'], paws_train_set['label']
# paws_val_sentences1, paws_val_sentences2, paws_val_labels = paws_val_set['sentence1'], paws_val_set['sentence2'], paws_val_set['label']
# paws_test_sentences1, paws_test_sentences2, paws_test_labels = paws_test_set['sentence1'], paws_test_set['sentence2'], paws_test_set['label']

# print("Dimensioni dei set:")
# print(f"Train: {len(paws_train_sentences1)}")
# print(f"Validation: {len(paws_val_sentences1)}")
# print(f"Test: {len(paws_test_sentences1)}")

# # Verifica distribuzione delle etichette
# print("\nDistribuzione delle etichette:")
# print(f"Train: {Counter(paws_train_labels)}")
# print(f"Validation: {Counter(paws_val_labels)}")
# print(f"Test: {Counter(paws_test_labels)}")

In [65]:

# from torch.utils.data import DataLoader

# MAX_SEQ_LEN = 256 

# # Inizializza il Tokenizer
# tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# #Ottieni i dataset
# paws_training_data = NLIDataset(sentences1 = paws_train_sentences1,
#                             sentences2 = paws_train_sentences2,
#                             labels = paws_train_labels,
#                             tokenizer = tokenizer,
#                             max_len = MAX_SEQ_LEN)

# paws_validation_data = NLIDataset(sentences1 = paws_val_sentences1,
#                             sentences2 = paws_val_sentences2,
#                             labels = paws_val_labels,
#                             tokenizer = tokenizer,
#                             max_len = MAX_SEQ_LEN)

# paws_test_data = NLIDataset(sentences1 = paws_test_sentences1,
#                             sentences2 = paws_test_sentences2,
#                             labels = paws_test_labels,
#                             tokenizer = tokenizer,
#                             max_len = MAX_SEQ_LEN)

# # Creo i DataLoader
# paws_train_loader = DataLoader(paws_training_data, batch_size=BATCH_SIZE, shuffle=True)
# paws_val_loader = DataLoader(paws_validation_data, batch_size=BATCH_SIZE, shuffle=False)
# paws_test_loader = DataLoader(paws_test_data, batch_size=BATCH_SIZE, shuffle=False)

In [66]:
# import csv
# import itertools
# import pandas as pd
# from tllib.ranking import h_score

# # Device
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# results = []

# for task1, task2 in itertools.combinations(model_pool.keys(), 2):
#     print(f"Calcolando la similarità tra {task1} e {task2}...\n")

#     source_model = model_pool[task1]
#     target_loader = globals()[f"{task2}_train_loader"]  
    
#     leep_score = calculate_leep_scores({task1: source_model}, target_loader)[0]["Score"]
#     logme_score = calculate_logme_scores({task1: source_model}, target_loader, device)[0]["Score"]
#     h_score_score = calculate_h_scores({task1: source_model}, target_loader, device)[0]["Score"]
#     nce_score = calculate_nce_scores({task1: source_model}, target_loader, device)[0]["Score"]

#     results.append([task1, task2, "leep", leep_score])
#     results.append([task1, task2, "logme", logme_score])
#     results.append([task1, task2, "h-score", h_score_score])
#     results.append([task1, task2, "nce", nce_score])


# df_similarity = pd.DataFrame(results, columns=["Task1", "Task2", "Metrica", "Punteggio"])
# print(df_similarity)

 
# csv_filename = "task_similarity_scores.csv"
# df_similarity.to_csv(csv_filename, index=False)
# print(f"Risultati salvati in {csv_filename}")


## Addestramento LoRA cumulativo

In [67]:
import csv
import random

# Task target con i rispettivi DataLoader e dati
tasks = [
     ("sentiment140", sent140_train_loader, 2, sent140_training_data, sent140_validation_data, sent140_test_data,5e-4),
     ("imdb", imdb_train_loader, 13, imdb_training_data, imdb_validation_data, imdb_test_data, 5e-4),
     ("20_news_group", news_train_loader, 20, news_training_data, news_validation_data, news_test_data, 2e-4),
     ("dbpedia", dbpedia_train_loader, 14, dbpedia_training_data, dbpedia_validation_data, dbpedia_test_data, 2e-4),
     ("emotion_dataset", emotion_train_loader, 6, emotion_training_data, emotion_validation_data, emotion_test_data, 1e-4),
     ("rte", rte_train_loader, 2, rte_training_data, rte_validation_data, rte_test_data, 2e-4),
     ("qqp", qqp_train_loader, 2, qqp_training_data, qqp_validation_data, qqp_test_data, 1e-4),
     ("cola", cola_train_loader, 2, cola_training_data, cola_validation_data, cola_test_data, 1e-4),
]


# Parametri di training
epochs = 2
patience = 1
batch_size = 32
optimizer_class = torch.optim.AdamW
similarity_metric = "h-score"
min_accuracy= 0.70
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


csv_filename = "training_results.csv"
csv_headers = [
    "task_name", "selected models" , "selected_model", "similarity_metric", "similarity_score", 
    "selection_time", "training_time", "emission", "selection_emission",
    "learning_rate", "epochs", "batch_size", "optimizer",
    "test_loss", "test_accuracy", "test_f1_score"
]


with open(csv_filename, mode="w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(csv_headers)

    for task_name, train_loader, target_num_classes, training_data, validation_data, test_data, learning_rate in tasks:
        print(f"Training sul task: {task_name}...\n")
        
        # Addestramento
        result = train_target_task(
            task_name, train_loader, target_num_classes, model_pool, 
            training_data, validation_data, test_data, device,
            learning_rate, epochs, patience, batch_size, 
            optimizer_class, similarity_metric, min_accuracy
        )


        # Salvataggio dei risultati
        writer.writerow([
            task_name, result["selected_models"], result["selected_model"], similarity_metric,
            result["similarity_score"],  result["selection_time"], result["training_time"],
            result["emission"], result["selection_emission"], learning_rate, epochs, batch_size,
            optimizer_class.__name__, 
            result["test_loss"], result["test_acc"], result["test_f1"]
        ])

        print(f"Training completato per il task: {task_name}.\n")


print(f"Risultati salvati in '{csv_filename}'")

[codecarbon ERROR @ 15:52:19] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Training sul task: sentiment140...

Selezione del miglior modello per il task sentiment140 usando h-score...

Modelli selezionati per il confronto: ['sst', 'paws', 'ei', 'mnli', 'ag']

Source: sst, H-Score: 0.4868
Source: paws, H-Score: 0.4240
Source: ei, H-Score: 0.3794
Source: mnli, H-Score: 0.4430




Source: ag, H-Score: 0.3276


TypeError: unsupported format string passed to NoneType.__format__