# Unlearning DualTeacher


## 1. Setup e Import

In [1]:
!pip install rouge-score
import torch
import pandas as pd
import numpy as np
import json
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, TaskType, PeftModel
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from rouge_score import rouge_scorer

# Configurazioni
MODEL_PATH = "/kaggle/input/olmo-model/semeval25-unlearning-1B-model"
DATA_PATH = "/kaggle/input/olmo-model/semeval25-unlearning-data"

print(f"GPUs disponibili: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=b7686c2acd68e692f06be82969fee75fa84358da33f591aed52b2901ed8b07ec
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


2025-08-13 08:57:29.114998: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755075449.500673      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755075449.609497      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


GPUs disponibili: 2
GPU 0: Tesla T4
GPU 1: Tesla T4


## 2. Caricamento Dati e Modelli

In [2]:
# Caricamento dataset
retain_train_df = pd.read_parquet(f"{DATA_PATH}/data/retain_train-00000-of-00001.parquet", engine='pyarrow')
retain_validation_df = pd.read_parquet(f"{DATA_PATH}/data/retain_validation-00000-of-00001.parquet", engine='pyarrow')
forget_train_df = pd.read_parquet(f"{DATA_PATH}/data/forget_train-00000-of-00001.parquet", engine='pyarrow')
forget_validation_df = pd.read_parquet(f"{DATA_PATH}/data/forget_validation-00000-of-00001.parquet", engine='pyarrow')

# Salvataggio in formato JSONL
!mkdir -p train validation
retain_train_df.to_json('train/retain.jsonl', orient='records', lines=True)
forget_train_df.to_json('train/forget.jsonl', orient='records', lines=True)
retain_validation_df.to_json('validation/retain.jsonl', orient='records', lines=True)
forget_validation_df.to_json('validation/forget.jsonl', orient='records', lines=True)

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-1B-0724-hf")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("Dataset salvati e tokenizer caricato")

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Dataset salvati e tokenizer caricato


## 3. Configurazione Modelli

In [None]:
# Configurazione LoRA
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
)

# Modello studente (GPU 0)
base_model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, local_files_only=True)
student_model = get_peft_model(base_model, lora_config)
student_model = student_model.to("cuda:0")
student_model.print_trainable_parameters()

# Insegnante competente (GPU 1)
good_teacher = AutoModelForCausalLM.from_pretrained(MODEL_PATH, local_files_only=True)
good_teacher = good_teacher.to("cuda:1")
for param in good_teacher.parameters():
    param.requires_grad = False
good_teacher.eval()

print("Modelli configurati su GPU separate")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 4,194,304 || all params: 1,283,981,312 || trainable%: 0.3267


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Modelli configurati su GPU separate


## 4. Bad Teacher

In [None]:
class badTeacher(nn.Module):
    """Insegnante che genera distribuzioni casuali per l'unlearning"""
    
    def __init__(self, vocab_size, device="cuda:1"):
        super().__init__()
        self.vocab_size = vocab_size
        self.device = device
    
    def forward(self, input_ids, attention_mask=None, **kwargs):
        batch_size, seq_len = input_ids.shape
        # Distribuzione uniforme (massima entropia = massima confusione)
        logits = torch.zeros(batch_size, seq_len, self.vocab_size, device=self.device)
        # Piccolo rumore per evitare degenerazione
        logits += torch.randn_like(logits) * 0.01
        
        from types import SimpleNamespace
        return SimpleNamespace(logits=logits)

# Creare insegnante incompetente
vocab_size = student_model.config.vocab_size
bad_teacher = badTeacher(vocab_size, device="cuda:1")
bad_teacher.eval()

print(f"Insegnante incompetente creato con vocab_size={vocab_size}")

Insegnante incompetente creato con vocab_size=50304


## 5. Dataset e DataLoader

In [5]:
class UnlearningDataset(Dataset):
    def __init__(self, jsonl_path, tokenizer, max_length=256):
        self.data = []
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        with open(jsonl_path, 'r', encoding='utf-8') as f:
            for line in f:
                item = json.loads(line.strip())
                self.data.append(item)
        
        print(f"Caricati {len(self.data)} esempi da {jsonl_path}")
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        
        # Estrarre testo
        if 'text' in item:
            text = item['text']
        else:
            text = str(list(item.values())[0])
        
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': encoding['input_ids'].squeeze()
        }

# Creare dataset e dataloader
retain_dataset = UnlearningDataset('train/retain.jsonl', tokenizer)
forget_dataset = UnlearningDataset('train/forget.jsonl', tokenizer)

batch_size = 2
retain_loader = DataLoader(retain_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
forget_loader = DataLoader(forget_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

print(f"DataLoader creati: retain={len(retain_loader)} batch, forget={len(forget_loader)} batch")

Caricati 1136 esempi da train/retain.jsonl
Caricati 1112 esempi da train/forget.jsonl
DataLoader creati: retain=568 batch, forget=556 batch


In [None]:
# Creare dataset e dataloader per validation dataset
val_retain_dataset = UnlearningDataset('validation/retain.jsonl', tokenizer)
val_forget_dataset = UnlearningDataset('validation/forget.jsonl', tokenizer)

batch_size = 2
val_retain_loader = DataLoader(val_retain_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_forget_loader = DataLoader(val_forget_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

print(f"DataLoader val creati: retain={len(val_retain_loader)} batch, forget={len(val_forget_loader)} batch")

Caricati 278 esempi da validation/retain.jsonl
Caricati 254 esempi da validation/forget.jsonl
DataLoader val creati: retain=139 batch, forget=127 batch


## 6. KL divergence con batch combinato

In [None]:
def create_combined_batch(retain_batch, forget_batch):
    """Combina batch retain e forget con etichette split"""
    
    # Retain (split = 0)
    retain_data = {
        'input_ids': retain_batch['input_ids'],
        'attention_mask': retain_batch['attention_mask'],
        'labels': retain_batch['labels'],
        'split': torch.zeros(retain_batch['input_ids'].size(0))
    }
    
    # Forget (split = 1)
    forget_data = {
        'input_ids': forget_batch['input_ids'],
        'attention_mask': forget_batch['attention_mask'],
        'labels': forget_batch['labels'],
        'split': torch.ones(forget_batch['input_ids'].size(0))
    }
    
    # Combinare
    combined_batch = {
        'input_ids': torch.cat([retain_data['input_ids'], forget_data['input_ids']], dim=0),
        'attention_mask': torch.cat([retain_data['attention_mask'], forget_data['attention_mask']], dim=0),
        'labels': torch.cat([retain_data['labels'], forget_data['labels']], dim=0),
        'split': torch.cat([retain_data['split'], forget_data['split']], dim=0)
    }
    
    return combined_batch

def compute_balanced_kl_loss(student_model, good_teacher, bad_teacher, batch):
      """Loss bilanciato: out_teacher = (1-split)*good + split*bad"""

      input_ids = batch['input_ids'].to('cuda:0')
      attention_mask = batch['attention_mask'].to('cuda:0')
      split = batch['split'].to('cuda:0')

      # Forward pass studente
      student_outputs = student_model(input_ids=input_ids, attention_mask=attention_mask)     
      student_logits = student_outputs.logits

      # Forward pass insegnanti
      with torch.no_grad():
          input_ids_gpu1 = input_ids.to('cuda:1')
          attention_mask_gpu1 = attention_mask.to('cuda:1')

          good_outputs = good_teacher(input_ids=input_ids_gpu1, attention_mask=attention_mask_gpu1)
          good_logits = good_outputs.logits.to('cuda:0')

          bad_outputs = bad_teacher(input_ids=input_ids_gpu1,attention_mask=attention_mask_gpu1)
          bad_logits = bad_outputs.logits.to('cuda:0')

      # CORREZIONE: Broadcasting corretto per split
      split_expanded = split.unsqueeze(-1).unsqueeze(-1)
      combined_teacher_logits = (1 - split_expanded) * good_logits + split_expanded * bad_logits

      # KL divergence
      student_probs = F.log_softmax(student_logits, dim=-1)
      teacher_probs = F.softmax(combined_teacher_logits, dim=-1)
      kl_loss = F.kl_div(student_probs, teacher_probs, reduction='batchmean')

      return kl_loss




## 7. Training Loop Bilanciato

In [None]:
def train_balanced_unlearning_with_validation(
    student_model, good_teacher, bad_teacher,
    retain_loader, forget_loader, val_retain_loader, val_forget_loader,
    optimizer, tokenizer, num_epochs=3, device="cuda:0"
):
    """
    Loop di training unificato:
    - Batch bilanciati retain/forget
    - Loss smussata (probabilità combinate)
    - Validazione ad ogni epoca
    - Salvataggio modello e tokenizer
    """
    
    
    student_model.to(device)
    student_model.train()
    
    good_teacher.eval()
    bad_teacher.eval()
    
    print("🚀 TRAINING BILANCIATO con VALIDAZIONE")
    print("=" * 70)
    
    for epoch in range(num_epochs):
        print(f"\n📅 EPOCA {epoch + 1}/{num_epochs}")
        
        epoch_train_losses = []
        
        # Iteratori per retain e forget
        retain_iter = iter(retain_loader)
        forget_iter = iter(forget_loader)
        max_steps = max(len(retain_loader), len(forget_loader))
        
        # --- Training ---
        with tqdm(total=max_steps, desc=f"Epoca {epoch+1} (Train)") as pbar:
            for _ in range(max_steps):
                try:
                    retain_batch = next(retain_iter)
                except StopIteration:
                    retain_iter = iter(retain_loader)
                    retain_batch = next(retain_iter)
                
                try:
                    forget_batch = next(forget_iter)
                except StopIteration:
                    forget_iter = iter(forget_loader)
                    forget_batch = next(forget_iter)
                
                # Batch combinato
                combined_batch = create_combined_batch(retain_batch, forget_batch)
                
                optimizer.zero_grad()
                loss = compute_balanced_kl_loss(
                    student_model, good_teacher, bad_teacher, combined_batch
                )
                loss.backward()
                optimizer.step()
                
                # print({"Train Loss": loss.item()})
                epoch_train_losses.append(loss.item())
                pbar.update(1)
        
        avg_train_loss = np.mean(epoch_train_losses)
        
        # --- Validazione ---
        val_losses = []
        val_retain_iter = iter(val_retain_loader)
        val_forget_iter = iter(val_forget_loader)
        max_val_steps = max(len(val_retain_loader), len(val_forget_loader))
        
        student_model.eval()
        with torch.no_grad():
            with tqdm(total=max_val_steps, desc=f"Epoca {epoch+1} (Val)") as pbar:
                for _ in range(max_val_steps):
                    try:
                        retain_batch = next(val_retain_iter)
                    except StopIteration:
                        val_retain_iter = iter(val_retain_loader)
                        retain_batch = next(val_retain_iter)
                    
                    try:
                        forget_batch = next(val_forget_iter)
                    except StopIteration:
                        val_forget_iter = iter(val_forget_loader)
                        forget_batch = next(val_forget_iter)
                    
                    combined_batch = create_combined_batch(retain_batch, forget_batch)
                    val_loss = compute_balanced_kl_loss(
                        student_model, good_teacher, bad_teacher, combined_batch
                    )
                    # print({"Val Loss": val_loss.item()})
                    val_losses.append(val_loss.item())
                    pbar.update(1)
        
        avg_val_loss = np.mean(val_losses)
        
        print(f"📊 Epoca {epoch+1} - Train Loss medio: {avg_train_loss:.4f} | Val Loss medio: {avg_val_loss:.4f}")
        
        # --- Salvataggio ---
        save_path = f"studentmodel_epoch_{epoch+1}"
        student_model.save_pretrained(save_path)
        tokenizer.save_pretrained(save_path)
        
        student_model.train()
    
    print("\n✅ TRAINING COMPLETATO!")


## 8. Configurazione Training

In [9]:
# Parametri training
num_epochs = 4
learning_rate = 1e-4

# Optimizer
optimizer = torch.optim.AdamW(student_model.parameters(), lr=learning_rate, weight_decay=0.01)

# Salvare stato iniziale per task vector
initial_state_dict = {}
for name, param in student_model.named_parameters():
    if param.requires_grad:
        initial_state_dict[name] = param.data.clone()

print(f"Configurazione completata:")
print(f"- Epoche: {num_epochs}")
print(f"- Learning rate: {learning_rate}")
print(f"- Parametri LoRA salvati: {len(initial_state_dict)}")

Configurazione completata:
- Epoche: 4
- Learning rate: 0.0001
- Parametri LoRA salvati: 128


## 9. Esecuzione Training

In [None]:
# Eseguire training bilanciato
training_metrics = train_balanced_unlearning_with_validation(
    student_model, good_teacher, bad_teacher,
    retain_loader, forget_loader, val_retain_loader, val_forget_loader,
    optimizer, tokenizer, num_epochs)

🚀 TRAINING BILANCIATO con VALIDAZIONE

📅 EPOCA 1/4


Epoca 1 (Train): 100%|██████████| 568/568 [19:54<00:00,  2.10s/it]
Epoca 1 (Val): 100%|██████████| 139/139 [03:11<00:00,  1.38s/it]


📊 Epoca 1 - Train Loss medio: 225.9901 | Val Loss medio: 351.0510

📅 EPOCA 2/4


Epoca 2 (Train): 100%|██████████| 568/568 [20:04<00:00,  2.12s/it]
Epoca 2 (Val): 100%|██████████| 139/139 [03:11<00:00,  1.38s/it]


📊 Epoca 2 - Train Loss medio: 118.3526 | Val Loss medio: 488.7881

📅 EPOCA 3/4


Epoca 3 (Train): 100%|██████████| 568/568 [20:04<00:00,  2.12s/it]
Epoca 3 (Val): 100%|██████████| 139/139 [03:11<00:00,  1.38s/it]


📊 Epoca 3 - Train Loss medio: 82.7263 | Val Loss medio: 551.1994

📅 EPOCA 4/4


Epoca 4 (Train): 100%|██████████| 568/568 [20:04<00:00,  2.12s/it]
Epoca 4 (Val): 100%|██████████| 139/139 [03:11<00:00,  1.38s/it]

📊 Epoca 4 - Train Loss medio: 58.3913 | Val Loss medio: 531.3211

✅ TRAINING COMPLETATO!





## 10. Salvataggio Risultati

In [11]:
# Creare directory
os.makedirs('balanced_results', exist_ok=True)

# Salvare modello
student_model.save_pretrained('balanced_results/balanced_model')

# Calcolare task vector
task_vector = {}
for name, param in student_model.named_parameters():
    if param.requires_grad and name in initial_state_dict:
        task_vector[name] = param.data - initial_state_dict[name]

# Salvare task vector e metriche
torch.save(task_vector, 'balanced_results/task_vector.pt')
torch.save(training_metrics, 'balanced_results/training_metrics.pt')

print("✅ Risultati salvati in balanced_results/")
print("- balanced_model/: Modello con unlearning bilanciato")
print("- task_vector.pt: Task vector per applicazioni future")
print("- training_metrics.pt: Metriche di training")

✅ Risultati salvati in balanced_results/
- balanced_model/: Modello con unlearning bilanciato
- task_vector.pt: Task vector per applicazioni future
- training_metrics.pt: Metriche di training


## 11. Valutazione Veloce

In [None]:
def quick_evaluation(model, tokenizer, retain_path, forget_path, max_examples=30):
    """Valutazione veloce per confronto"""
    
    def eval_perplexity(path, label):
        perplexities = []
        with open(path, "r") as f:
            for i, line in enumerate(f):
                if i >= max_examples:
                    break
                try:
                    item = json.loads(line)
                    text = str(list(item.values())[0])
                    
                    encoding = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
                    encoding = {k: v.to(model.device) for k, v in encoding.items()}
                    
                    with torch.no_grad():
                        outputs = model(**encoding, labels=encoding['input_ids'])
                        if outputs.loss is not None:
                            perplexities.append(torch.exp(outputs.loss).item())
                except:
                    continue
        
        avg_ppl = np.mean(perplexities) if perplexities else float('inf')
        print(f"  {label}: {avg_ppl:.2f} (su {len(perplexities)} esempi)")
        return avg_ppl
    
    print(f"📊 Valutazione veloce (max {max_examples} esempi):")
    retain_ppl = eval_perplexity(retain_path, "Retain")
    forget_ppl = eval_perplexity(forget_path, "Forget")
    
    ratio = forget_ppl / retain_ppl if retain_ppl != float('inf') else float('inf')
    print(f"  Unlearning Ratio: {ratio:.2f} (>1 = buono)")
    
    return {"retain_ppl": retain_ppl, "forget_ppl": forget_ppl, "ratio": ratio}

# Valutare modello finale
student_model.eval()
results = quick_evaluation(student_model, tokenizer, 
                          "validation/retain.jsonl", "validation/forget.jsonl")

print("\n RISULTATO: Retain basso + Forget alto = Unlearning efficace!")

In [None]:
def evaluate_model_final(model, tokenizer, max_examples=100):
    """Valutazione completa del modello"""
    
    print("VALUTAZIONE FINALE")
    print("=" * 40)
    
    from rouge_score import rouge_scorer
    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
    
    def eval_dataset(path, name):
        perplexities, rouge_scores = [], []
        
        with open(path, "r") as f:
            for i, line in enumerate(f):
                if i >= max_examples: 
                    break
                try:
                    item = json.loads(line)
                    keys = list(item.keys())
                    question = str(item[keys[0]])
                    expected = str(item[keys[1]]) if len(keys) > 1 else ""
                    
                    # Perplexity
                    full_text = question + expected
                    enc = tokenizer(full_text, return_tensors="pt", max_length=256, truncation=True)
                    enc = {k: v.to(model.device) for k, v in enc.items()}
                    
                    with torch.no_grad():
                        outputs = model(**enc, labels=enc['input_ids'])
                        if outputs.loss is not None:
                            perplexities.append(torch.exp(outputs.loss).item())
                    
                    # Generazione per ROUGE
                    input_enc = tokenizer(question, return_tensors="pt", max_length=256, truncation=True)
                    input_enc = {k: v.to(model.device) for k, v in input_enc.items()}
                    
                    with torch.no_grad():
                        gen_outputs = model.generate(
                            input_enc['input_ids'],
                            attention_mask=input_enc['attention_mask'],
                            max_new_tokens=64,
                            do_sample=False,
                            pad_token_id=tokenizer.eos_token_id
                        )
                        
                        new_tokens = gen_outputs[:, input_enc['input_ids'].shape[-1]:]
                        generated_text = tokenizer.decode(new_tokens[0], skip_special_tokens=True)
                    
                    # ROUGE score
                    rouge_result = scorer.score(expected, generated_text)
                    rouge_scores.append(rouge_result['rougeL'].fmeasure)
                    
                    if i % 25 == 0:
                        print(f"  Processati {i+1}/{max_examples} esempi...")
                            
                except Exception as e:
                    continue
        
        avg_ppl = np.mean(perplexities) if perplexities else float('inf')
        avg_rouge = np.mean(rouge_scores) if rouge_scores else 0.0
        
        print(f"\n📈 {name}:")
        print(f"  Perplexity: {avg_ppl:.2f} (su {len(perplexities)} esempi)")
        print(f"  ROUGE-L: {avg_rouge:.3f} (su {len(rouge_scores)} esempi)")
        
        return avg_ppl, avg_rouge
    
    retain_ppl, retain_rouge = eval_dataset("validation/retain.jsonl", "RETAIN")
    forget_ppl, forget_rouge = eval_dataset("validation/forget.jsonl", "FORGET")
    
    # Analisi finale
    print("\n" + "=" * 40)
    print(" ANALISI UNLEARNING")
    print("=" * 40)
    
    ratio = forget_ppl / retain_ppl if retain_ppl > 0 else 0
    print(f"\n📊 Unlearning Ratio: {ratio:.3f}")
    
    if ratio > 1.2:
        print("✅ OTTIMO: Unlearning efficace")
    elif ratio > 1.0:
        print("⚠️ BUONO: Unlearning parziale")
    else:
        print("❌ Insufficiente unlearning")
    
    print(f"\n Qualità Generazione:")
    print(f"  Retain ROUGE-L: {retain_rouge:.3f}")
    print(f"  Forget ROUGE-L: {forget_rouge:.3f}")
    
    if retain_rouge > forget_rouge:
        print("✅ CORRETTO: Mantiene qualità su retain, degrada su forget")
    else:
        print("⚠️ ATTENZIONE: Qualità simile su entrambi i set")
    
    return {
        "retain_ppl": retain_ppl, 
        "forget_ppl": forget_ppl, 
        "ratio": ratio,
        "retain_rouge": retain_rouge,
        "forget_rouge": forget_rouge
    }

# Eseguire valutazione
student_model.eval()
torch.cuda.empty_cache()

final_results = evaluate_model_final(student_model, tokenizer, max_examples=100)

# Salvare risultati valutazione
torch.save(final_results, 'balanced_results/evaluation_results.pt')
print("\n Risultati valutazione salvati in: balanced_results/evaluation_results.pt")

📊 VALUTAZIONE FINALE
  Processati 1/100 esempi...
  Processati 26/100 esempi...
  Processati 51/100 esempi...
  Processati 76/100 esempi...

📈 RETAIN:
  Perplexity: 116.42 (su 100 esempi)
  ROUGE-L: 0.004 (su 100 esempi)
  Processati 1/100 esempi...
  Processati 26/100 esempi...
  Processati 51/100 esempi...
  Processati 76/100 esempi...

📈 FORGET:
  Perplexity: 167.07 (su 100 esempi)
  ROUGE-L: 0.002 (su 100 esempi)

🎯 ANALISI UNLEARNING

📊 Unlearning Ratio: 1.435
✅ OTTIMO: Unlearning efficace

📝 Qualità Generazione:
  Retain ROUGE-L: 0.004
  Forget ROUGE-L: 0.002
✅ CORRETTO: Mantiene qualità su retain, degrada su forget

💾 Risultati valutazione salvati in: balanced_results/evaluation_results.pt


In [None]:
import gc
import torch

# Mostra memoria prima
!nvidia-smi

# Elimina tutte le variabili definite (tranne quelle di sistema)
for name in dir():
    if not name.startswith('_'):
        del globals()[name]

import gc
import torch
# Garbage collector per pulire RAM Python
gc.collect()

# Libera la cache PyTorch
torch.cuda.empty_cache()
torch.cuda.ipc_collect()

# Mostra memoria dopo
!nvidia-smi
