# Addestramento dei modelli del pool

#### Configurazioni generali

Installazione della librerie necessarie.

In [1]:
!pip install transformers datasets torch peft



In [2]:
!pip install tensorflow==2.17.0

Collecting tensorflow==2.17.0
  Downloading tensorflow-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Downloading tensorflow-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (601.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m601.3/601.3 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[?25hInstalling collected packages: tensorflow
  Attempting uninstall: tensorflow
    Found existing installation: tensorflow 2.17.1
    Uninstalling tensorflow-2.17.1:
      Successfully uninstalled tensorflow-2.17.1
Successfully installed tensorflow-2.17.0


In [3]:
!pip install codecarbon

Collecting codecarbon
  Downloading codecarbon-2.8.3-py3-none-any.whl.metadata (8.7 kB)
Collecting fief-client[cli] (from codecarbon)
  Downloading fief_client-0.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting questionary (from codecarbon)
  Downloading questionary-2.1.0-py3-none-any.whl.metadata (5.4 kB)
Collecting rapidfuzz (from codecarbon)
  Downloading rapidfuzz-3.12.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting httpx<0.28.0,>=0.21.3 (from fief-client[cli]->codecarbon)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jwcrypto<2.0.0,>=1.4 (from fief-client[cli]->codecarbon)
  Downloading jwcrypto-1.5.6-py3-none-any.whl.metadata (3.1 kB)
Collecting yaspin (from fief-client[cli]->codecarbon)
  Downloading yaspin-3.1.0-py3-none-any.whl.metadata (14 kB)
Collecting termcolor<2.4.0,>=2.2.0 (from yaspin->fief-client[cli]->codecarbon)
  Downloading termcolor-2.3.0-py3-none-any.whl.metadata (5.3 kB)
Downloading codecarbon

Importo i moduli necessari.

In [4]:
import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset

Impostazione del seme casuale per la riproducibilità.

In [5]:
seed_value = 42

os.environ['PYTHONHASHSEED'] = str(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(seed_value)

# Imposto il seme casuale anche per i calcoli CUDA
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)  
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

## AG News

### Ottenimento dei dati e preprocessing

Carico il dataset **AG News**, una raccolta di articoli di notizie che devono essere classificate in una delle quattro categorie predefinite: **World**, **Sports**, **Business**, e **Sci/Tech**.

In [6]:
# ottenimento del dataset

ag_dataset = load_dataset("ag_news")
print(ag_dataset)

README.md:   0%|          | 0.00/8.07k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 120000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 7600
    })
})


Divido i dati di train in training set e validation set.

In [7]:
from sklearn.model_selection import train_test_split
from collections import Counter

ag_train_data = ag_dataset["train"]
ag_test_data = ag_dataset["test"]

ag_train_sentences, ag_val_sentences, ag_train_labels, ag_val_labels = train_test_split(
                                                  ag_train_data['text'], 
                                                  ag_train_data['label'],
                                                  test_size=4000, 
                                                  train_size=20000,
                                                  random_state=42,
                                                  shuffle=True,
                                                  stratify=ag_train_data['label'])

ag_test_sentences, ag_test_labels = ag_test_data['text'], ag_test_data['label']

ag_train_sentences = ag_train_sentences[:10]
ag_val_sentences = ag_val_sentences[:10]
ag_test_sentences = ag_test_sentences[:10]
ag_train_labels = ag_train_labels[:10]
ag_val_labels  =ag_val_labels[:10]
ag_test_labels=ag_test_labels[:10]

print("Dimensioni dei set:")
print(f"Train: {len(ag_train_sentences)}")
print(f"Validation: {len(ag_val_sentences)}")
print(f"Test: {len(ag_test_sentences)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(ag_train_labels)}")
print(f"Validation: {Counter(ag_val_labels)}")
print(f"Test: {Counter(ag_test_labels)}")

Dimensioni dei set:
Train: 10
Validation: 10
Test: 10

Distribuzione delle etichette:
Train: Counter({1: 3, 2: 3, 0: 2, 3: 2})
Validation: Counter({2: 3, 1: 3, 0: 3, 3: 1})
Test: Counter({3: 9, 2: 1})


In [11]:
from torch.utils.data import Dataset

class ClassificationDataset(Dataset):

    def __init__(self, sentences, labels, tokenizer, max_len):
        self.sentences = sentences
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.sentences)
    
    def __getitem__(self,index):
        sentence = self.sentences[index]
        label = self.labels[index]
        
        encoding = self.tokenizer.encode_plus(
            sentence,
            add_special_tokens=True,
            max_length=self.max_len,
            truncation=True,
            return_token_type_ids=True,
            padding="max_length",
            return_attention_mask=True,
            return_tensors='pt')
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'token_type_ids': encoding["token_type_ids"].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
            }

Inizializzo il Tokenizer BERT per tokenizzare le frasi e creo i dataset personalizzati.

In [12]:
from transformers import BertTokenizer
from torch.utils.data import DataLoader

MAX_SEQ_LEN = 128

# Inizializza il Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

#Ottieni i dataset
ag_training_data = ClassificationDataset( sentences = ag_train_sentences,
                           labels = ag_train_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

ag_validation_data = ClassificationDataset( sentences = ag_val_sentences,
                             labels = ag_val_labels,
                             tokenizer = tokenizer,
                             max_len = MAX_SEQ_LEN)

ag_test_data = ClassificationDataset( sentences = ag_test_sentences,
                       labels = ag_test_labels,
                       tokenizer = tokenizer,
                       max_len = MAX_SEQ_LEN)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

### Addestramento del modello

In [13]:
from tqdm import tqdm
import time
import torch
from sklearn.metrics import accuracy_score, f1_score
import torch.nn as nn
from codecarbon import EmissionsTracker


# Funzione di training e valutazione
def train_and_evaluate_model(model, dataset, train_loader, val_loader, optimizer, scheduler, device, epochs=10, patience=3):

    os.makedirs("carbon_emissions", exist_ok=True)
    tracker = EmissionsTracker(output_dir="carbon_emissions", output_file="emissions.csv")  
    tracker.start()  

    history = {"train_loss": [], "train_acc": [], "val_loss": [], "val_acc": []}
    best_accuracy = 0
    best_loss = float('inf')
    patience_counter = 0  

    start_time = time.time()

    for epoch in range(epochs):
        print(f"\nEpoch {epoch + 1}/{epochs}")

        # Training
        train_loss, train_acc = train_model(model, train_loader, optimizer, scheduler, device)
        
        # Valutazione
        val_loss, val_acc, val_f1 = eval_model(model, val_loader, device)
        
        # Salvataggio del modello migliore
        if val_acc > best_accuracy:
            torch.save(model.state_dict(),  f"{dataset}_best_model_state.bin")
            best_accuracy = val_acc

        # Salvataggio delle metriche
        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            patience_counter = 0 
        else:
            patience_counter += 1
            print(f"La loss sul validation set non è migliorata per {patience_counter} epoche.")

        if patience_counter >= patience:
            print(f"Early stopping attivato dopo {patience_counter} epoche senza miglioramenti")
            break

    end_time = time.time()
    total_training_time = end_time - start_time

    emissions = tracker.stop()
    print(f"\nEmissioni CO₂ totali: {emissions:.4f} kg")  

    return history, total_training_time, emissions

In [14]:
# Funzione di training
def train_model(model, data_loader, optimizer, scheduler, device):

    model = model.train()

    total_loss = 0
    all_preds = []
    all_labels = []

    loop = tqdm(data_loader, desc=f"Training  ", leave=True)

    for batch in loop:

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        token_type_ids = batch['token_type_ids'].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()

        # --- Forward pass ---
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            labels=labels 
        )

        loss = outputs.loss  
        logits = outputs.logits  

        # --- Backward pass ---
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

        preds = torch.argmax(logits, dim=1)  # Predizioni multiclasse

        all_preds.extend(preds.detach().cpu().numpy())
        all_labels.extend(labels.detach().cpu().numpy())

        loop.set_postfix(loss=total_loss / (loop.n + 1), accuracy=accuracy_score(all_labels, all_preds))

    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)

    return avg_loss, accuracy

In [15]:
# Funzione di valutazione
def eval_model(model, data_loader, device):

    model = model.eval()

    total_loss = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        
        loop = tqdm(data_loader, desc=f"Evaluating", leave=True)
        for batch in loop:
            
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            token_type_ids = batch["token_type_ids"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids,
                labels=labels
            )

            loss = outputs.loss
            logits = outputs.logits

            total_loss += loss.item()

            preds = torch.argmax(logits, dim=1)

            all_preds.extend(preds.detach().cpu().numpy())
            all_labels.extend(labels.detach().cpu().numpy())

            loop.set_postfix(loss=total_loss / (loop.n + 1), accuracy=accuracy_score(all_labels, all_preds))

    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average="weighted")
    
    return avg_loss, accuracy, f1  

Creo il modello con LoRA.

In [16]:
from peft import LoraConfig, get_peft_model
from transformers import BertForSequenceClassification, AutoModelForSequenceClassification

# Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Pretrained model
lora_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)

# LoRA config
lora_config = LoraConfig(
    r=16,
    lora_alpha=256,
    lora_dropout=0.2,
    target_modules=["query", "key", "value"],
    bias="none",
)

lora_model = get_peft_model(lora_model, lora_config)
lora_model.print_trainable_parameters()

lora_model.to(device)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 884,736 || all params: 110,370,052 || trainable%: 0.8016


PeftModel(
  (base_model): LoraModel(
    (model): BertForSequenceClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.2, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768

In [17]:
for name, param in lora_model.named_parameters():
    if "classifier" in name:
        param.requires_grad = True

for name, param in lora_model.named_parameters():
    if "classifier" in name:
        print(f"{name}: requires_grad = {param.requires_grad}")

base_model.model.classifier.weight: requires_grad = True
base_model.model.classifier.bias: requires_grad = True


Imposto i parametri principali ed effettuo l'addestramento.

In [18]:
# Parametri principali
learning_rate = 2e-4
EPOCHS = 10
BATCH_SIZE = 32

# Creo i DataLoader
ag_train_loader = DataLoader(ag_training_data, batch_size=BATCH_SIZE, shuffle=True)
ag_val_loader = DataLoader(ag_validation_data, batch_size=BATCH_SIZE, shuffle=False)
ag_test_loader = DataLoader(ag_test_data, batch_size=BATCH_SIZE, shuffle=False)

total_steps = len(ag_train_loader) * EPOCHS

# Ottimizzatore
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, lora_model.parameters()), lr = learning_rate)


# Scheduler
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer = optimizer,
                                                       num_warmup_steps = 0,
                                                       num_training_steps = total_steps)

In [19]:
history, total_time, emissions = train_and_evaluate_model(
    lora_model,"ag", ag_train_loader, ag_val_loader, optimizer, scheduler, device, epochs=10
) 
print(f"\nBERT with LoRA Training Time: {total_time:.2f} seconds, {total_time/60:.2f} minutes.")

[codecarbon INFO @ 20:46:02] [setup] RAM Tracking...
[codecarbon INFO @ 20:46:02] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at \sys\class\powercap\intel-rapl to measure CPU

[codecarbon INFO @ 20:46:03] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 20:46:03] [setup] GPU Tracking...
[codecarbon INFO @ 20:46:03] Tracking Nvidia GPU via pynvml
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/codecarbon/core/gpu.py", line 116, in _get_total_energy_consumption
    return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 2039, in nvmlDeviceGetTotalEnergyConsumption
    _nvmlCheckReturn(ret)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 765, in _nvmlCheckReturn
    raise NVMLError(ret)
pynvml.nvml.NVMLError_NotSupported: Not Supported
Traceback (most recent call last):
  File "/usr/loc


Epoch 1/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  1.14it/s, accuracy=0.2, loss=1.46]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 16.90it/s, accuracy=0.2, loss=1.39]



Epoch 2/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  8.72it/s, accuracy=0.1, loss=1.44]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 17.90it/s, accuracy=0.4, loss=1.34]



Epoch 3/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.21it/s, accuracy=0.3, loss=1.31]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 18.20it/s, accuracy=0.8, loss=1.3]



Epoch 4/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.07it/s, accuracy=0.4, loss=1.31]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 16.83it/s, accuracy=0.8, loss=1.29]



Epoch 5/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  8.63it/s, accuracy=0.6, loss=1.27]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 14.89it/s, accuracy=0.6, loss=1.28]


Epoch 6/10



Training  : 100%|██████████| 1/1 [00:00<00:00,  8.21it/s, accuracy=0.7, loss=1.22]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 17.89it/s, accuracy=0.6, loss=1.26]



Epoch 7/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.20it/s, accuracy=0.7, loss=1.17]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 18.00it/s, accuracy=0.6, loss=1.25]



Epoch 8/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.24it/s, accuracy=0.9, loss=1.09]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 18.31it/s, accuracy=0.6, loss=1.25]



Epoch 9/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.21it/s, accuracy=0.8, loss=1.13]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 17.41it/s, accuracy=0.6, loss=1.25]



Epoch 10/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  8.41it/s, accuracy=0.6, loss=1.15]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 14.93it/s, accuracy=0.6, loss=1.25]
[codecarbon INFO @ 20:46:12] Energy consumed for RAM : 0.000018 kWh. RAM Power : 11.756441116333008 W
[codecarbon INFO @ 20:46:12] Energy consumed for all CPUs : 0.000066 kWh. Total CPU Power : 42.5 W
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/codecarbon/core/gpu.py", line 116, in _get_total_energy_consumption
    return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 2039, in nvmlDeviceGetTotalEnergyConsumption
    _nvmlCheckReturn(ret)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 765, in _nvmlCheckReturn
    raise NVMLError(ret)
pynvml.nvml.NVMLError_NotSupported: Not Supported
[codecarbon INFO @ 20:46:12] Energy consumed for all GPUs : 0.000000 kWh. Total GPU Power : 0.0 W
[codec


Emissioni CO₂ totali: 0.0000 kg

BERT with LoRA Training Time: 5.58 seconds, 0.09 minutes.


  df = pd.concat([df, pd.DataFrame.from_records([dict(total.values)])])


### Valutazione del modello
Valuto il modello calcolando la loss sul test set, l'accuracy e l'F1-score.

In [20]:
lora_model.load_state_dict(torch.load("ag_best_model_state.bin"))

test_loss, test_acc, test_f1 = eval_model(lora_model, ag_test_loader, device)
print(f"LoRA Fine-Tuning - Test loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}, F1 score: {test_f1:.4f}")

  lora_model.load_state_dict(torch.load("ag_best_model_state.bin"))
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 16.27it/s, accuracy=0.6, loss=1.32]

LoRA Fine-Tuning - Test loss: 1.3153, Accuracy: 0.6000, F1 score: 0.7095





In [21]:
model_performance = []

# Funzione per memorizzare le performance sul task appena addestrato
def add_task_results(task_name, training_time, emissions, test_loss, test_acc, test_f1):
    model_performance.append({
        "Task": task_name,
        "Training Time": training_time,
        "CO2 Emissions": emissions,
        "Test Loss": test_loss,
        "Accuracy": test_acc,
        "F1 Score": test_f1,
    })

In [22]:
# Memorizzazione dei risultati su Sentiment140
add_task_results(
    task_name="agnews", 
    training_time=total_time,
    emissions=emissions,
    test_loss=test_loss,
    test_acc=test_acc,
    test_f1=test_f1,
)

performance = pd.DataFrame(model_performance)
print(performance)

     Task  Training Time  CO2 Emissions  Test Loss  Accuracy  F1 Score
0  agnews       5.584955       0.000012    1.31526       0.6  0.709524


### Salvataggio dell'adapter LoRA

In [23]:
lora_model.save_pretrained("ag_lora_adapter")

classifier_state_dict = {
    "classifier.weight": lora_model.base_model.model.classifier.weight.cpu(),
    "classifier.bias": lora_model.base_model.model.classifier.bias.cpu()
}

torch.save(classifier_state_dict, "ag_classifier_head.pth")

In [24]:
# from peft import PeftModel

# base_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)
# lora_model = PeftModel.from_pretrained(base_model, "ag_lora_adapter")

# classifier_state_dict = torch.load("ag_classifier_head.pth", map_location=device, weights_only=True)

# lora_model.base_model.classifier.weight.data.copy_(classifier_state_dict["classifier.weight"])
# lora_model.base_model.classifier.bias.data.copy_(classifier_state_dict["classifier.bias"])


## SST-2

### Ottenimento dei dati e preprocessing

Carico il dataset SST-2, un dataset contenente esempi che consistono in frasi tratte da recensioni di film le cui etichette sono 1 se la recensione positiva, 0 altrimenti.

In [25]:
from datasets import load_dataset

sst_dataset = load_dataset('glue','sst2')
print(sst_dataset)

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 67349
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 872
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1821
    })
})


In [26]:
from sklearn.model_selection import train_test_split
from collections import Counter

sst_data = sst_dataset['train'].shuffle(seed=42)

sst_temp_sentences, sst_test_sentences, sst_temp_labels, sst_test_labels = train_test_split(
                                                  sst_data['sentence'], 
                                                  sst_data['label'], 
                                                  test_size=4000, 
                                                  random_state=42,
                                                  stratify=sst_data['label'])

sst_train_sentences, sst_val_sentences, sst_train_labels, sst_val_labels = train_test_split(
                                                  sst_temp_sentences, 
                                                  sst_temp_labels,
                                                  train_size=20000,
                                                  test_size=4000, 
                                                  random_state=42,
                                                  stratify=sst_temp_labels)

sst_train_sentences = sst_train_sentences[:10]
sst_val_sentences = sst_val_sentences[:10]
sst_test_sentences = sst_test_sentences[:10]
sst_train_labels = sst_train_labels[:10]
sst_val_labels  =sst_val_labels[:10]
sst_test_labels=sst_test_labels[:10]

print("Dimensioni dei set:")
print(f"Train: {len(sst_train_sentences)}")
print(f"Validation: {len(sst_val_sentences)}")
print(f"Test: {len(sst_test_sentences)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(sst_train_labels)}")
print(f"Validation: {Counter(sst_val_labels)}")
print(f"Test: {Counter(sst_test_labels)}")

Dimensioni dei set:
Train: 10
Validation: 10
Test: 10

Distribuzione delle etichette:
Train: Counter({0: 6, 1: 4})
Validation: Counter({0: 5, 1: 5})
Test: Counter({1: 7, 0: 3})


Inizializzo il Tokenizer BERT per tokenizzare le frasi e creo i dataset personalizzati.

In [29]:
from transformers import BertTokenizer
from torch.utils.data import DataLoader

MAX_SEQ_LEN = 128

# Inizializza il Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

#Ottieni i dataset
sst_training_data = ClassificationDataset(sentences = sst_train_sentences,
                           labels = sst_train_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

sst_validation_data = ClassificationDataset(sentences = sst_val_sentences,
                           labels = sst_val_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

sst_test_data = ClassificationDataset(sentences = sst_test_sentences,
                           labels = sst_test_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

### Addestramento del modello

In [30]:
from peft import LoraConfig, get_peft_model
from transformers import BertForSequenceClassification, AutoModelForSequenceClassification

# Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Pretrained model
lora_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

# LoRA config
lora_config = LoraConfig(
    r=32,
    lora_alpha=128,
    lora_dropout=0.1,
    target_modules=["query", "key", "value"], 
    bias="none",
)

lora_model = get_peft_model(lora_model, lora_config)
lora_model.print_trainable_parameters()

lora_model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,769,472 || all params: 111,253,250 || trainable%: 1.5905


PeftModel(
  (base_model): LoraModel(
    (model): BertForSequenceClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768

In [31]:
for name, param in lora_model.named_parameters():
    if "classifier" in name:
        param.requires_grad = True

for name, param in lora_model.named_parameters():
    if "classifier" in name:
        print(f"{name}: requires_grad = {param.requires_grad}")


base_model.model.classifier.weight: requires_grad = True
base_model.model.classifier.bias: requires_grad = True


In [32]:
# Parametri principali
learning_rate = 5e-4
EPOCHS = 10
BATCH_SIZE = 32

# Creo i DataLoader
sst_train_loader = DataLoader(sst_training_data, batch_size=BATCH_SIZE, shuffle=True)
sst_val_loader = DataLoader(sst_validation_data, batch_size=BATCH_SIZE, shuffle=False)
sst_test_loader = DataLoader(sst_test_data, batch_size=BATCH_SIZE, shuffle=False)

total_steps = len(sst_train_loader) * EPOCHS

# Ottimizzatore
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, lora_model.parameters()), lr = learning_rate)

# Scheduler
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer = optimizer,
                                                       num_warmup_steps = 0,
                                                       num_training_steps = total_steps)

In [33]:
history, total_time, emissions = train_and_evaluate_model(
    lora_model,"sst", sst_train_loader, sst_val_loader, optimizer, scheduler, device, epochs=EPOCHS
) 
print(f"\nBERT with LoRA Training Time: {total_time:.2f} seconds, {total_time/60:.2f} minutes.")

[codecarbon INFO @ 20:46:24] [setup] RAM Tracking...
[codecarbon INFO @ 20:46:24] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at \sys\class\powercap\intel-rapl to measure CPU

[codecarbon INFO @ 20:46:25] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 20:46:25] [setup] GPU Tracking...
[codecarbon INFO @ 20:46:25] Tracking Nvidia GPU via pynvml
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/codecarbon/core/gpu.py", line 116, in _get_total_energy_consumption
    return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 2039, in nvmlDeviceGetTotalEnergyConsumption
    _nvmlCheckReturn(ret)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 765, in _nvmlCheckReturn
    raise NVMLError(ret)
pynvml.nvml.NVMLError_NotSupported: Not Supported
Traceback (most recent call last):
  File "/usr/loc


Epoch 1/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  8.95it/s, accuracy=0.6, loss=0.699]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.76it/s, accuracy=0.5, loss=0.685]



Epoch 2/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.66it/s, accuracy=0.5, loss=0.685]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 20.36it/s, accuracy=0.5, loss=0.697]


La loss sul validation set non è migliorata per 1 epoche.

Epoch 3/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.70it/s, accuracy=0.6, loss=0.665]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 20.13it/s, accuracy=0.5, loss=0.692]


La loss sul validation set non è migliorata per 2 epoche.

Epoch 4/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.63it/s, accuracy=0.6, loss=0.603]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 20.17it/s, accuracy=0.5, loss=0.687]
[codecarbon INFO @ 20:46:29] Energy consumed for RAM : 0.000004 kWh. RAM Power : 11.756441116333008 W
[codecarbon INFO @ 20:46:29] Energy consumed for all CPUs : 0.000016 kWh. Total CPU Power : 42.5 W
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/codecarbon/core/gpu.py", line 116, in _get_total_energy_consumption
    return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 2039, in nvmlDeviceGetTotalEnergyConsumption
    _nvmlCheckReturn(ret)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 765, in _nvmlCheckReturn
    raise NVMLError(ret)
pynvml.nvml.NVMLError_NotSupported: Not Supported
[codecarbon INFO @ 20:46:30] Energy consumed for all GPUs : 0.000000 kWh. Total GPU Power : 0.0 W
[cod

La loss sul validation set non è migliorata per 3 epoche.
Early stopping attivato dopo 3 epoche senza miglioramenti

Emissioni CO₂ totali: 0.0000 kg

BERT with LoRA Training Time: 1.32 seconds, 0.02 minutes.


### 3. Valutazione del modello
Valuto i modello calcolando la loss sul test set, l'accuracy e l'F1-score.

In [34]:
lora_model.load_state_dict(torch.load("sst_best_model_state.bin"))

test_loss, test_acc, test_f1 = eval_model(lora_model, sst_test_loader, device)
print(f"LoRA Fine-Tuning - Test loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}, F1 score: {test_f1:.4f}")

  lora_model.load_state_dict(torch.load("sst_best_model_state.bin"))
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 20.40it/s, accuracy=0.3, loss=0.742]

LoRA Fine-Tuning - Test loss: 0.7416, Accuracy: 0.3000, F1 score: 0.1385





In [35]:
# Memorizzazione dei risultati su Sentiment140
add_task_results(
    task_name="sst", 
    training_time=total_time,
    emissions=emissions,
    test_loss=test_loss,
    test_acc=test_acc,
    test_f1=test_f1,
)

performance = pd.DataFrame(model_performance)
print(performance)

     Task  Training Time  CO2 Emissions  Test Loss  Accuracy  F1 Score
0  agnews       5.584955       0.000012   1.315260       0.6  0.709524
1     sst       1.320940       0.000003   0.741614       0.3  0.138462


#### 4. Salvataggio del modulo lora

In [36]:
lora_model.save_pretrained("sst_lora_adapter")

classifier_state_dict = {
    "classifier.weight": lora_model.base_model.model.classifier.weight.cpu(),
    "classifier.bias": lora_model.base_model.model.classifier.bias.cpu()
}

torch.save(classifier_state_dict, "sst_classifier_head.pth")

In [37]:
# from peft import PeftModel

# base_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
# lora_model = PeftModel.from_pretrained(base_model, "sst_lora_adapter")

# classifier_state_dict = torch.load("sst_classifier_head.pth", map_location=device, weights_only=True)


# lora_model.base_model.classifier.weight.data.copy_(classifier_state_dict["classifier.weight"])
# lora_model.base_model.classifier.bias.data.copy_(classifier_state_dict["classifier.bias"])


## EmoInt

In [38]:
def load_emoint_dataset(file_path):
    label_map = {"anger": 0, "joy": 1, "sadness": 2, "fear": 3}
    
    df = pd.read_csv(file_path, sep="\t", header=None, names=["id", "sentence", "label", "intensity"])
    
    df = df[["sentence", "label"]]
    df["label"] = df["label"].map(label_map)
    
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    
    return df


ei_dataset = load_emoint_dataset("/kaggle/input/emoint-dataset/Emotion Intensity Dataset.txt")
print(ei_dataset.head())


                                            sentence  label
0  what does everyone have against sparkling wate...      1
1  Or when they hmu on snap, and I'm like.. which...      0
2  Can we get a shot of Lingys face at 1/4 time ?...      0
3  I stepped into the shower and my spidey senses...      3
4  @AaliyahLove69 I would be intimidated but I wo...      3


In [40]:
from sklearn.model_selection import train_test_split
from collections import Counter

# Divido i dati in training set, validation set e test set
ei_temp_sentences, ei_test_sentences, ei_temp_labels, ei_test_labels = train_test_split(
                                                ei_dataset['sentence'],
                                                ei_dataset['label'], 
                                                test_size=0.1, 
                                                random_state=42,
                                                stratify=ei_dataset['label'])

ei_train_sentences, ei_val_sentences, ei_train_labels, ei_val_labels = train_test_split(
                                                ei_temp_sentences,
                                                ei_temp_labels,
                                                test_size=0.1111,
                                                random_state=42,
                                                stratify=ei_temp_labels)

ei_train_sentences = ei_train_sentences.reset_index(drop=True)
ei_val_sentences = ei_val_sentences.reset_index(drop=True)
ei_test_sentences = ei_test_sentences.reset_index(drop=True)
ei_train_labels = ei_train_labels.reset_index(drop=True)
ei_val_labels = ei_val_labels.reset_index(drop=True)
ei_test_labels = ei_test_labels.reset_index(drop=True)


ei_train_sentences = ei_train_sentences[:10]
ei_val_sentences = ei_val_sentences[:10]
ei_test_sentences = ei_test_sentences[:10]
ei_train_labels = ei_train_labels[:10]
ei_val_labels  =ei_val_labels[:10]
ei_test_labels=ei_test_labels[:10]

print("Dimensioni dei set:")
print(f"Train: {len(ei_train_sentences)}")
print(f"Validation: {len(ei_val_sentences)}")
print(f"Test: {len(ei_test_sentences)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(ei_train_labels)}")
print(f"Validation: {Counter(ei_val_labels)}")
print(f"Test: {Counter(ei_test_labels)}")

Dimensioni dei set:
Train: 10
Validation: 10
Test: 10

Distribuzione delle etichette:
Train: Counter({1: 4, 3: 3, 0: 2, 2: 1})
Validation: Counter({0: 4, 1: 3, 2: 2, 3: 1})
Test: Counter({0: 3, 2: 3, 1: 3, 3: 1})


In [41]:
import matplotlib.pyplot as plt
from collections import Counter

# Mappatura delle etichette numeriche in stringhe
label_map = {
    0: "anger",
    1: "joy",
    2: "sadness",
    3: "fear"
}

# Conta la distribuzione delle etichette nel training set
train_label_counts = Counter(ei_train_labels)

# Converti le etichette numeriche in stringhe
labels = [label_map[label] for label in sorted(train_label_counts.keys())]
counts = [train_label_counts[label] for label in sorted(train_label_counts.keys())]

# Creazione del grafico
plt.figure(figsize=(5, 4))
plt.bar(labels, counts, color='royalblue', width=0.5)  
plt.xlabel("Classi")
plt.ylabel("Frequenza")
plt.title("Distribuzione delle etichette nel Training Set")
plt.xticks(labels, rotation=0, ha="center")  # Imposta il testo orizzontale e centrato
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Mostra il grafico
plt.show()


In [42]:
from transformers import BertTokenizer
from torch.utils.data import DataLoader

MAX_SEQ_LEN = 128

# Inizializza il Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

#Ottieni i dataset
ei_training_data = ClassificationDataset(sentences = ei_train_sentences,
                           labels = ei_train_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

ei_validation_data = ClassificationDataset(sentences = ei_val_sentences,
                           labels = ei_val_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

ei_test_data = ClassificationDataset(sentences = ei_test_sentences,
                           labels = ei_test_labels,
                           tokenizer = tokenizer,
                           max_len = MAX_SEQ_LEN)

In [43]:
from peft import LoraConfig, get_peft_model
from transformers import BertForSequenceClassification, AutoModelForSequenceClassification

# Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Pretrained model
lora_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)

# LoRA config
lora_config = LoraConfig(
    r=32,
    lora_alpha=512,
    lora_dropout=0.3,
    target_modules=["query", "key", "value"],  
    bias="none",
)

lora_model = get_peft_model(lora_model, lora_config)
lora_model.print_trainable_parameters()

lora_model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,769,472 || all params: 111,254,788 || trainable%: 1.5905


PeftModel(
  (base_model): LoraModel(
    (model): BertForSequenceClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.3, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768

In [44]:
for name, param in lora_model.named_parameters():
    if "classifier" in name:
        param.requires_grad = True

for name, param in lora_model.named_parameters():
    if "classifier" in name:
        print(f"{name}: requires_grad = {param.requires_grad}")


base_model.model.classifier.weight: requires_grad = True
base_model.model.classifier.bias: requires_grad = True


In [45]:
# Parametri principali
learning_rate = 1e-4
EPOCHS = 10
BATCH_SIZE = 32

# Creo i DataLoader
ei_train_loader = DataLoader(ei_training_data, batch_size=BATCH_SIZE, shuffle=True)
ei_val_loader = DataLoader(ei_validation_data, batch_size=BATCH_SIZE, shuffle=False)
ei_test_loader = DataLoader(ei_test_data, batch_size=BATCH_SIZE, shuffle=False)

total_steps = len(ei_train_loader) * EPOCHS

# Ottimizzatore
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, lora_model.parameters()), lr = learning_rate)

# Scheduler
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer = optimizer,
                                                       num_warmup_steps = 0.2 * total_steps,
                                                       num_training_steps = total_steps)

In [46]:
history, total_time, emissions = train_and_evaluate_model(
    lora_model,"ei", ei_train_loader, ei_val_loader, optimizer, scheduler, device, epochs=EPOCHS
) 
print(f"\nBERT with LoRA Training Time: {total_time:.2f} seconds, {total_time/60:.2f} minutes.")

[codecarbon INFO @ 20:46:31] [setup] RAM Tracking...
[codecarbon INFO @ 20:46:31] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at \sys\class\powercap\intel-rapl to measure CPU

[codecarbon INFO @ 20:46:32] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 20:46:32] [setup] GPU Tracking...
[codecarbon INFO @ 20:46:32] Tracking Nvidia GPU via pynvml
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/codecarbon/core/gpu.py", line 116, in _get_total_energy_consumption
    return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 2039, in nvmlDeviceGetTotalEnergyConsumption
    _nvmlCheckReturn(ret)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 765, in _nvmlCheckReturn
    raise NVMLError(ret)
pynvml.nvml.NVMLError_NotSupported: Not Supported
Traceback (most recent call last):
  File "/usr/loc


Epoch 1/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.57it/s, accuracy=0.2, loss=1.43]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.66it/s, accuracy=0.1, loss=1.6]



Epoch 2/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.51it/s, accuracy=0.3, loss=1.39]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.62it/s, accuracy=0.1, loss=1.57]



Epoch 3/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.59it/s, accuracy=0.5, loss=1.24]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.41it/s, accuracy=0.2, loss=1.55]



Epoch 4/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.54it/s, accuracy=0.5, loss=1.26]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.64it/s, accuracy=0.2, loss=1.52]



Epoch 5/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.54it/s, accuracy=0.5, loss=1.23]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.71it/s, accuracy=0.2, loss=1.51]



Epoch 6/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.61it/s, accuracy=0.7, loss=1.17]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.38it/s, accuracy=0.3, loss=1.5]



Epoch 7/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.43it/s, accuracy=0.7, loss=1.17]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.15it/s, accuracy=0.3, loss=1.49]



Epoch 8/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.55it/s, accuracy=0.7, loss=1.15]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.52it/s, accuracy=0.3, loss=1.49]



Epoch 9/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.66it/s, accuracy=0.6, loss=1.14]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.33it/s, accuracy=0.3, loss=1.48]



Epoch 10/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  9.61it/s, accuracy=0.5, loss=1.15]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.62it/s, accuracy=0.3, loss=1.48]
[codecarbon INFO @ 20:46:40] Energy consumed for RAM : 0.000015 kWh. RAM Power : 11.756441116333008 W
[codecarbon INFO @ 20:46:40] Energy consumed for all CPUs : 0.000054 kWh. Total CPU Power : 42.5 W
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/codecarbon/core/gpu.py", line 116, in _get_total_energy_consumption
    return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 2039, in nvmlDeviceGetTotalEnergyConsumption
    _nvmlCheckReturn(ret)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 765, in _nvmlCheckReturn
    raise NVMLError(ret)
pynvml.nvml.NVMLError_NotSupported: Not Supported
[codecarbon INFO @ 20:46:40] Energy consumed for all GPUs : 0.000000 kWh. Total GPU Power : 0.0 W
[codec


Emissioni CO₂ totali: 0.0000 kg

BERT with LoRA Training Time: 4.55 seconds, 0.08 minutes.


In [47]:
lora_model.load_state_dict(torch.load("ei_best_model_state.bin")) 

test_loss, test_acc, test_f1 = eval_model(lora_model, ei_test_loader, device)
print(f"LoRA Fine-Tuning - Test loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}, F1 score: {test_f1:.4f}")

  lora_model.load_state_dict(torch.load("ei_best_model_state.bin"))
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 19.48it/s, accuracy=0.2, loss=1.51]

LoRA Fine-Tuning - Test loss: 1.5141, Accuracy: 0.2000, F1 score: 0.1091





In [48]:
# Memorizzazione dei risultati su Sentiment140
add_task_results(
    task_name="ei", 
    training_time=total_time,
    emissions=emissions,
    test_loss=test_loss,
    test_acc=test_acc,
    test_f1=test_f1,
)

performance = pd.DataFrame(model_performance)
print(performance)

     Task  Training Time  CO2 Emissions  Test Loss  Accuracy  F1 Score
0  agnews       5.584955       0.000012   1.315260       0.6  0.709524
1     sst       1.320940       0.000003   0.741614       0.3  0.138462
2      ei       4.548648       0.000010   1.514121       0.2  0.109091


In [49]:
lora_model.save_pretrained("ei_lora_adapter")

classifier_state_dict = {
    "classifier.weight": lora_model.base_model.model.classifier.weight.cpu(),
    "classifier.bias": lora_model.base_model.model.classifier.bias.cpu()
}

torch.save(classifier_state_dict, "ei_classifier_head.pth")

In [50]:
# from peft import PeftModel

# base_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)
# lora_model = PeftModel.from_pretrained(base_model, "ei__lora_adapter")

# classifier_state_dict = torch.load("ei_classifier_head.pth", map_location=device, weights_only=True)


# lora_model.base_model.classifier.weight.data.copy_(classifier_state_dict["classifier.weight"])
# lora_model.base_model.classifier.bias.data.copy_(classifier_state_dict["classifier.bias"])


## MNLI

### Ottenimento del dataset e preprocessing

Carico il dataset MNLI, che contiene esempi che consistono in una coppia di frasi (premessa e ipotesi) etichettate con **Entailment**, **Contradiction**, **Neutral**.

In [51]:
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from collections import Counter

# Carico il datast
mnli_dataset = load_dataset('glue', 'mnli')
print(mnli_dataset)

# Divido i dati in training set, validation set e test set
mnli_data = mnli_dataset['train'].shuffle(seed=42)

mnli_temp_premises, mnli_test_premises, mnli_temp_hypotheses, mnli_test_hypotheses, mnli_temp_labels, mnli_test_labels = train_test_split(
                                                  mnli_data['premise'], 
                                                  mnli_data['hypothesis'],                
                                                  mnli_data['label'], 
                                                  test_size=3000, 
                                                  random_state=42,
                                                  stratify=mnli_data['label'])

mnli_train_premises, mnli_val_premises, mnli_train_hypotheses, mnli_val_hypotheses, mnli_train_labels, mnli_val_labels = train_test_split(
                                                  mnli_temp_premises, 
                                                  mnli_temp_hypotheses,
                                                  mnli_temp_labels,
                                                  train_size=45000,
                                                  test_size=3000, 
                                                  random_state=42,
                                                  stratify=mnli_temp_labels)

mnli_train_premises = mnli_train_premises[:10]
mnli_val_premises = mnli_val_premises[:10]
mnli_test_premises = mnli_test_premises[:10]
mnli_train_hypotheses = mnli_train_hypotheses[:10]
mnli_val_hypotheses = mnli_val_hypotheses[:10]
mnli_test_hypotheses = mnli_test_hypotheses[:10]
mnli_train_labels = mnli_train_labels[:10]
mnli_val_labels  = mnli_val_labels[:10]
mnli_test_labels=mnli_test_labels[:10]

print("Dimensioni dei set:")
print(f"Train: {len(mnli_train_premises)}")
print(f"Validation: {len(mnli_val_premises)}")
print(f"Test: {len(mnli_test_premises)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(mnli_train_labels)}")
print(f"Validation: {Counter(mnli_val_labels)}")
print(f"Test: {Counter(mnli_test_labels)}")

train-00000-of-00001.parquet:   0%|          | 0.00/52.2M [00:00<?, ?B/s]

(…)alidation_matched-00000-of-00001.parquet:   0%|          | 0.00/1.21M [00:00<?, ?B/s]

(…)dation_mismatched-00000-of-00001.parquet:   0%|          | 0.00/1.25M [00:00<?, ?B/s]

test_matched-00000-of-00001.parquet:   0%|          | 0.00/1.22M [00:00<?, ?B/s]

test_mismatched-00000-of-00001.parquet:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/392702 [00:00<?, ? examples/s]

Generating validation_matched split:   0%|          | 0/9815 [00:00<?, ? examples/s]

Generating validation_mismatched split:   0%|          | 0/9832 [00:00<?, ? examples/s]

Generating test_matched split:   0%|          | 0/9796 [00:00<?, ? examples/s]

Generating test_mismatched split:   0%|          | 0/9847 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['premise', 'hypothesis', 'label', 'idx'],
        num_rows: 392702
    })
    validation_matched: Dataset({
        features: ['premise', 'hypothesis', 'label', 'idx'],
        num_rows: 9815
    })
    validation_mismatched: Dataset({
        features: ['premise', 'hypothesis', 'label', 'idx'],
        num_rows: 9832
    })
    test_matched: Dataset({
        features: ['premise', 'hypothesis', 'label', 'idx'],
        num_rows: 9796
    })
    test_mismatched: Dataset({
        features: ['premise', 'hypothesis', 'label', 'idx'],
        num_rows: 9847
    })
})
Dimensioni dei set:
Train: 10
Validation: 10
Test: 10

Distribuzione delle etichette:
Train: Counter({0: 5, 1: 3, 2: 2})
Validation: Counter({1: 6, 2: 3, 0: 1})
Test: Counter({1: 5, 2: 3, 0: 2})


Creo una classe Dataset personalizzata in cui viene effettuata la tokenizzaione delle recensioni e la conversione dei dati in tensori.

In [52]:
from torch.utils.data import Dataset

class NLIDataset(Dataset):

    def __init__(self, premises, hypotheses , labels, tokenizer, max_len):
        self.premises = premises
        self.hypotheses = hypotheses
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.premises)
    
    def __getitem__(self,index):
        premise = self.premises[index]
        hyphotesis = self.hypotheses[index]
        label = self.labels[index]
        
        encoding = self.tokenizer.encode_plus(
            premise,
            hyphotesis,
            add_special_tokens=True,
            max_length=self.max_len,
            truncation=True,
            return_token_type_ids=True,
            padding="max_length",
            return_attention_mask=True,
            return_tensors='pt')
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'token_type_ids': encoding["token_type_ids"].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
            }

Inizializzo il Tokenizer BERT per tokenizzare le frasi e creo i dataset personalizzati.

In [53]:
from transformers import BertTokenizer
from torch.utils.data import DataLoader

MAX_SEQ_LEN = 512

# Inizializza il Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

#Ottieni i dataset
mnli_training_data = NLIDataset(premises = mnli_train_premises,
                            hypotheses = mnli_train_hypotheses,
                            labels = mnli_train_labels,
                            tokenizer = tokenizer,
                            max_len = MAX_SEQ_LEN)

mnli_validation_data = NLIDataset(premises = mnli_val_premises,
                            hypotheses = mnli_val_hypotheses,
                            labels = mnli_val_labels,
                            tokenizer = tokenizer,
                            max_len = MAX_SEQ_LEN)

mnli_test_data = NLIDataset(premises = mnli_test_premises,
                            hypotheses = mnli_test_hypotheses,
                            labels = mnli_test_labels,
                            tokenizer = tokenizer,
                            max_len = MAX_SEQ_LEN)

### Addestramento del modello

In [54]:
from peft import LoraConfig, get_peft_model
from transformers import BertForSequenceClassification, AutoModelForSequenceClassification

# Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Pretrained model
lora_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)

# LoRA config
lora_config = LoraConfig(
    r=32,
    lora_alpha=128,
    lora_dropout=0.2,
    target_modules=["query", "key", "value"], 
    bias="none",
)

lora_model = get_peft_model(lora_model, lora_config)
lora_model.print_trainable_parameters()

lora_model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,769,472 || all params: 111,254,019 || trainable%: 1.5905


PeftModel(
  (base_model): LoraModel(
    (model): BertForSequenceClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.2, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768

In [55]:
for name, param in lora_model.named_parameters():
    if "classifier" in name:
        param.requires_grad = True

for name, param in lora_model.named_parameters():
    if "classifier" in name:
        print(f"{name}: requires_grad = {param.requires_grad}")


base_model.model.classifier.weight: requires_grad = True
base_model.model.classifier.bias: requires_grad = True


In [56]:
# Parametri principali
learning_rate = 2e-4
EPOCHS = 10
BATCH_SIZE = 32

# Creo i DataLoader
mnli_train_loader = DataLoader(mnli_training_data, batch_size=BATCH_SIZE, shuffle=True)
mnli_val_loader = DataLoader(mnli_validation_data, batch_size=BATCH_SIZE, shuffle=False)
mnli_test_loader = DataLoader(mnli_test_data, batch_size=BATCH_SIZE, shuffle=False)

total_steps = len(mnli_train_loader) * EPOCHS

# Ottimizzatore
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, lora_model.parameters()), lr = learning_rate)


# Scheduler
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer = optimizer,
                                                       num_warmup_steps = 0,
                                                       num_training_steps = total_steps)

In [57]:
history, total_time, emissions = train_and_evaluate_model(
    lora_model,"mnli", mnli_train_loader, mnli_val_loader, optimizer, scheduler, device, epochs=EPOCHS
) 
print(f"\nBERT with LoRA Training Time: {total_time:.2f} seconds, {total_time/60:.2f} minutes.")

[codecarbon INFO @ 20:46:55] [setup] RAM Tracking...
[codecarbon INFO @ 20:46:55] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at \sys\class\powercap\intel-rapl to measure CPU

[codecarbon INFO @ 20:46:56] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 20:46:56] [setup] GPU Tracking...
[codecarbon INFO @ 20:46:56] Tracking Nvidia GPU via pynvml
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/codecarbon/core/gpu.py", line 116, in _get_total_energy_consumption
    return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 2039, in nvmlDeviceGetTotalEnergyConsumption
    _nvmlCheckReturn(ret)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 765, in _nvmlCheckReturn
    raise NVMLError(ret)
pynvml.nvml.NVMLError_NotSupported: Not Supported
Traceback (most recent call last):
  File "/usr/loc


Epoch 1/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  2.12it/s, accuracy=0.2, loss=1.18]
Evaluating: 100%|██████████| 1/1 [00:00<00:00,  5.53it/s, accuracy=0.6, loss=1.09]



Epoch 2/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  2.12it/s, accuracy=0.6, loss=0.986]
Evaluating: 100%|██████████| 1/1 [00:00<00:00,  5.56it/s, accuracy=0.2, loss=1.16]


La loss sul validation set non è migliorata per 1 epoche.

Epoch 3/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  2.12it/s, accuracy=0.4, loss=1.09]
Evaluating: 100%|██████████| 1/1 [00:00<00:00,  5.56it/s, accuracy=0.1, loss=1.23]


La loss sul validation set non è migliorata per 2 epoche.

Epoch 4/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  2.12it/s, accuracy=0.5, loss=0.972]
Evaluating: 100%|██████████| 1/1 [00:00<00:00,  5.52it/s, accuracy=0.1, loss=1.28]
[codecarbon INFO @ 20:47:02] Energy consumed for RAM : 0.000011 kWh. RAM Power : 11.756441116333008 W
[codecarbon INFO @ 20:47:02] Energy consumed for all CPUs : 0.000039 kWh. Total CPU Power : 42.5 W
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/codecarbon/core/gpu.py", line 116, in _get_total_energy_consumption
    return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 2039, in nvmlDeviceGetTotalEnergyConsumption
    _nvmlCheckReturn(ret)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 765, in _nvmlCheckReturn
    raise NVMLError(ret)
pynvml.nvml.NVMLError_NotSupported: Not Supported
[codecarbon INFO @ 20:47:02] Energy consumed for all GPUs : 0.000000 kWh. Total GPU Power : 0.0 W
[code

La loss sul validation set non è migliorata per 3 epoche.
Early stopping attivato dopo 3 epoche senza miglioramenti

Emissioni CO₂ totali: 0.0000 kg

BERT with LoRA Training Time: 3.33 seconds, 0.06 minutes.


### Valutazione del modello
Valuto i modello calcolando la loss sul test set, l'accuracy e l'F1-score.

In [58]:
lora_model.load_state_dict(torch.load("mnli_best_model_state.bin"))
            
test_loss, test_acc, test_f1 = eval_model(lora_model, mnli_test_loader, device)
print(f"LoRA Fine-Tuning - Test loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}, F1 score: {test_f1:.4f}")

  lora_model.load_state_dict(torch.load("mnli_best_model_state.bin"))
Evaluating: 100%|██████████| 1/1 [00:00<00:00,  5.47it/s, accuracy=0.5, loss=1.11]

LoRA Fine-Tuning - Test loss: 1.1082, Accuracy: 0.5000, F1 score: 0.3333





In [59]:
# Memorizzazione dei risultati su Sentiment140
add_task_results(
    task_name="mnli", 
    training_time=total_time,
    emissions=emissions,
    test_loss=test_loss,
    test_acc=test_acc,
    test_f1=test_f1,
)

performance = pd.DataFrame(model_performance)
print(performance)

     Task  Training Time  CO2 Emissions  Test Loss  Accuracy  F1 Score
0  agnews       5.584955       0.000012   1.315260       0.6  0.709524
1     sst       1.320940       0.000003   0.741614       0.3  0.138462
2      ei       4.548648       0.000010   1.514121       0.2  0.109091
3    mnli       3.331646       0.000007   1.108231       0.5  0.333333


### Salvataggio dell'adapter LoRA

In [60]:
lora_model.save_pretrained("mnli_lora_adapter")

classifier_state_dict = {
    "classifier.weight": lora_model.base_model.model.classifier.weight.cpu(),
    "classifier.bias": lora_model.base_model.model.classifier.bias.cpu()
}

torch.save(classifier_state_dict, "mnli_classifier_head.pth")

In [61]:
# from peft import PeftModel

# base_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)
# lora_model = PeftModel.from_pretrained(base_model, "mnli_lora_adapter")

# classifier_state_dict = torch.load("mnli_classifier_head.pth", map_location=device, weights_only=True)


# lora_model.base_model.classifier.weight.data.copy_(classifier_state_dict["classifier.weight"])
# lora_model.base_model.classifier.bias.data.copy_(classifier_state_dict["classifier.bias"])


## PAWS

In [62]:
from datasets import load_dataset

# Carico il datast
paws_dataset = load_dataset("google-research-datasets/paws", "labeled_final")
print(paws_dataset)

README.md:   0%|          | 0.00/9.79k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/8.43M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.24M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/49401 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/8000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/8000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'sentence1', 'sentence2', 'label'],
        num_rows: 49401
    })
    test: Dataset({
        features: ['id', 'sentence1', 'sentence2', 'label'],
        num_rows: 8000
    })
    validation: Dataset({
        features: ['id', 'sentence1', 'sentence2', 'label'],
        num_rows: 8000
    })
})


In [63]:
from sklearn.model_selection import train_test_split
from collections import Counter


paws_train_set = paws_dataset["train"]
paws_val_set = paws_dataset["validation"]
paws_test_set = paws_dataset["test"]

paws_train_sentences1, paws_train_sentences2, paws_train_labels = paws_train_set['sentence1'], paws_train_set['sentence2'], paws_train_set['label']
paws_val_sentences1, paws_val_sentences2, paws_val_labels = paws_val_set['sentence1'], paws_val_set['sentence2'], paws_val_set['label']
paws_test_sentences1, paws_test_sentences2, paws_test_labels = paws_test_set['sentence1'], paws_test_set['sentence2'], paws_test_set['label']


paws_train_sentences1 = paws_train_sentences1[:10]
paws_val_sentences1 = paws_val_sentences1[:10]
paws_test_sentences1 = paws_test_sentences1[:10]
paws_train_sentences2 = paws_train_sentences2[:10]
paws_val_sentences2 = paws_val_sentences2[:10]
paws_test_sentences2 = paws_test_sentences2[:10]
paws_train_labels = paws_train_labels[:10]
paws_val_labels  = paws_val_labels[:10]
paws_test_labels=paws_test_labels[:10]

print("Dimensioni dei set:")
print(f"Train: {len(paws_train_sentences1)}")
print(f"Validation: {len(paws_val_sentences1)}")
print(f"Test: {len(paws_test_sentences1)}")

# Verifica distribuzione delle etichette
print("\nDistribuzione delle etichette:")
print(f"Train: {Counter(paws_train_labels)}")
print(f"Validation: {Counter(paws_val_labels)}")
print(f"Test: {Counter(paws_test_labels)}")

Dimensioni dei set:
Train: 10
Validation: 10
Test: 10

Distribuzione delle etichette:
Train: Counter({0: 5, 1: 5})
Validation: Counter({1: 8, 0: 2})
Test: Counter({0: 6, 1: 4})


In [64]:
from transformers import BertTokenizer
from torch.utils.data import DataLoader

MAX_SEQ_LEN = 256 

# Inizializza il Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

#Ottieni i dataset
paws_training_data = NLIDataset(premises = paws_train_sentences1,
                            hypotheses = paws_train_sentences2,
                            labels = paws_train_labels,
                            tokenizer = tokenizer,
                            max_len = MAX_SEQ_LEN)

paws_validation_data = NLIDataset(premises = paws_val_sentences1,
                            hypotheses = paws_val_sentences2,
                            labels = paws_val_labels,
                            tokenizer = tokenizer,
                            max_len = MAX_SEQ_LEN)

paws_test_data = NLIDataset(premises = paws_test_sentences1,
                            hypotheses = paws_test_sentences2,
                            labels = paws_test_labels,
                            tokenizer = tokenizer,
                            max_len = MAX_SEQ_LEN)

### Addestramento del modello

In [65]:
from peft import LoraConfig, get_peft_model
from transformers import BertForSequenceClassification, AutoModelForSequenceClassification

# Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Pretrained model
lora_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

# LoRA config
lora_config = LoraConfig(
    r=16,
    lora_alpha=128,
    lora_dropout=0.3,
    target_modules=["query", "key", "value"], 
    bias="none",
)

lora_model = get_peft_model(lora_model, lora_config)
lora_model.print_trainable_parameters()

lora_model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 884,736 || all params: 110,368,514 || trainable%: 0.8016


PeftModel(
  (base_model): LoraModel(
    (model): BertForSequenceClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.3, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768

In [66]:
for name, param in lora_model.named_parameters():
    if "classifier" in name:
        param.requires_grad = True

for name, param in lora_model.named_parameters():
    if "classifier" in name:
        print(f"{name}: requires_grad = {param.requires_grad}")


base_model.model.classifier.weight: requires_grad = True
base_model.model.classifier.bias: requires_grad = True


In [67]:
from torch.utils.data import DataLoader

# Parametri principali
learning_rate = 5e-5
EPOCHS = 10
BATCH_SIZE = 32


# Creo i DataLoader
paws_train_loader = DataLoader(paws_training_data, batch_size=BATCH_SIZE, shuffle=True)
paws_val_loader = DataLoader(paws_validation_data, batch_size=BATCH_SIZE, shuffle=False)
paws_test_loader = DataLoader(paws_test_data, batch_size=BATCH_SIZE, shuffle=False)

total_steps = len(paws_train_loader) * EPOCHS

# Ottimizzatore
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, lora_model.parameters()), lr = learning_rate)


# Scheduler
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer = optimizer,
                                                       num_warmup_steps = 0,
                                                       num_training_steps = total_steps)

In [68]:
history, total_time, emissions = train_and_evaluate_model(
    lora_model,"paws", paws_train_loader, paws_val_loader, optimizer, scheduler, device, epochs=EPOCHS
) 
print(f"\nBERT with LoRA Training Time: {total_time:.2f} seconds, {total_time/60:.2f} minutes.")

[codecarbon INFO @ 20:47:07] [setup] RAM Tracking...
[codecarbon INFO @ 20:47:07] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at \sys\class\powercap\intel-rapl to measure CPU

[codecarbon INFO @ 20:47:08] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 20:47:08] [setup] GPU Tracking...
[codecarbon INFO @ 20:47:08] Tracking Nvidia GPU via pynvml
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/codecarbon/core/gpu.py", line 116, in _get_total_energy_consumption
    return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 2039, in nvmlDeviceGetTotalEnergyConsumption
    _nvmlCheckReturn(ret)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 765, in _nvmlCheckReturn
    raise NVMLError(ret)
pynvml.nvml.NVMLError_NotSupported: Not Supported
Traceback (most recent call last):
  File "/usr/loc


Epoch 1/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  4.28it/s, accuracy=0.6, loss=0.681]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 10.56it/s, accuracy=0.3, loss=0.698]



Epoch 2/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  4.52it/s, accuracy=0.6, loss=0.666]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 10.58it/s, accuracy=0.5, loss=0.692]



Epoch 3/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  4.41it/s, accuracy=0.7, loss=0.628]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 10.66it/s, accuracy=0.4, loss=0.693]


La loss sul validation set non è migliorata per 1 epoche.

Epoch 4/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  4.57it/s, accuracy=0.4, loss=0.739]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 10.64it/s, accuracy=0.4, loss=0.693]


La loss sul validation set non è migliorata per 2 epoche.

Epoch 5/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  4.59it/s, accuracy=0.6, loss=0.686]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 10.42it/s, accuracy=0.5, loss=0.692]



Epoch 6/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  4.60it/s, accuracy=0.5, loss=0.665]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 10.59it/s, accuracy=0.5, loss=0.691]



Epoch 7/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  4.59it/s, accuracy=0.4, loss=0.712]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 10.69it/s, accuracy=0.5, loss=0.691]


La loss sul validation set non è migliorata per 1 epoche.

Epoch 8/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  4.59it/s, accuracy=0.6, loss=0.672]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 10.47it/s, accuracy=0.6, loss=0.692]


La loss sul validation set non è migliorata per 2 epoche.

Epoch 9/10


Training  : 100%|██████████| 1/1 [00:00<00:00,  4.60it/s, accuracy=0.5, loss=0.736]
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 10.67it/s, accuracy=0.5, loss=0.692]
[codecarbon INFO @ 20:47:17] Energy consumed for RAM : 0.000019 kWh. RAM Power : 11.756441116333008 W
[codecarbon INFO @ 20:47:17] Energy consumed for all CPUs : 0.000070 kWh. Total CPU Power : 42.5 W
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/codecarbon/core/gpu.py", line 116, in _get_total_energy_consumption
    return pynvml.nvmlDeviceGetTotalEnergyConsumption(self.handle)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 2039, in nvmlDeviceGetTotalEnergyConsumption
    _nvmlCheckReturn(ret)
  File "/usr/local/lib/python3.10/dist-packages/pynvml/nvml.py", line 765, in _nvmlCheckReturn
    raise NVMLError(ret)
pynvml.nvml.NVMLError_NotSupported: Not Supported
[codecarbon INFO @ 20:47:17] Energy consumed for all GPUs : 0.000000 kWh. Total GPU Power : 0.0 W
[cod

La loss sul validation set non è migliorata per 3 epoche.
Early stopping attivato dopo 3 epoche senza miglioramenti

Emissioni CO₂ totali: 0.0000 kg

BERT with LoRA Training Time: 5.91 seconds, 0.10 minutes.


In [69]:
lora_model.load_state_dict(torch.load("paws_best_model_state.bin"))

test_loss, test_acc, test_f1 = eval_model(lora_model, paws_test_loader, device)
print(f"LoRA Fine-Tuning - Test loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}, F1 score: {test_f1:.4f}")

  lora_model.load_state_dict(torch.load("paws_best_model_state.bin"))
Evaluating: 100%|██████████| 1/1 [00:00<00:00, 10.23it/s, accuracy=0.3, loss=0.698]

LoRA Fine-Tuning - Test loss: 0.6976, Accuracy: 0.3000, F1 score: 0.3071





In [70]:
# Memorizzazione dei risultati su Sentiment140
add_task_results(
    task_name="paws", 
    training_time=total_time,
    emissions=emissions,
    test_loss=test_loss,
    test_acc=test_acc,
    test_f1=test_f1,
)

performance = pd.DataFrame(model_performance)
print(performance)

           Task  Training Time  CO2 Emissions  Test Loss  Accuracy  F1 Score
0        agnews       5.584955       0.000012   1.315260       0.6  0.709524
1           sst       1.320940       0.000003   0.741614       0.3  0.138462
2            ei       4.548648       0.000010   1.514121       0.2  0.109091
3          mnli       3.331646       0.000007   1.108231       0.5  0.333333
4  sentiment140       5.909914       0.000012   0.697641       0.3  0.307071


### Salvataggio dell'adapter LoRA

In [71]:
lora_model.save_pretrained("paws_lora_adapter")

classifier_state_dict = {
    "classifier.weight": lora_model.base_model.model.classifier.weight.cpu(),
    "classifier.bias": lora_model.base_model.model.classifier.bias.cpu()
}

torch.save(classifier_state_dict, "paws_classifier_head.pth")

In [72]:
# from peft import PeftModel

# base_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)
# lora_model = PeftModel.from_pretrained(base_model, "pwas_lora_adapter")

# classifier_state_dict = torch.load("paws_classifier_head.pth", map_location=device, weights_only=True)


# lora_model.base_model.classifier.weight.data.copy_(classifier_state_dict["classifier.weight"])
# lora_model.base_model.classifier.bias.data.copy_(classifier_state_dict["classifier.bias"])
