In [33]:
import json
import os
import numpy as np
import matplotlib.pyplot as plt
import gc
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix
import traceback
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random

# ==================================================================
# REPRODUCIBILITY SETTINGS
# ==================================================================
SEED = 42

def set_seed(seed=SEED):
    """Set all seeds for reproducibility"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

# Set seeds at import time
set_seed(SEED)

## Cella 2: Configurazione Percorsi e Layer Config

In [34]:
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
CACHE_DIR_NAME = "activation_cache"

LAYER_CONFIG = {
    "Qwen2.5-7B": 
    {
        "attn": [15, 16, 18],
        "mlp": [16, 18, 20],
        "hidden": [18, 19, 20]
    },    
    "Falcon3-7B-Base": 
    {
        "attn": [2, 7, 12],
        "mlp": [10, 11, 12],
        "hidden": [2, 3, 19]
    }
}

## Cella 3: Configurazione Encoder-Head (Teacher e Student)

In [35]:
# ==================================================================
# ENCODER-HEAD CONFIGURATION (Frozen Head Approach)
# ==================================================================
ENCODER_CONFIG = {
    "latent_dim": 256,
    "hidden_dim": 512,
    "dropout": 0.3,
    "learning_rate": 1e-3,
    "weight_decay": 1e-2,
    "batch_size": 64,
    "max_epochs": 100,
    "early_stopping_patience": 15,
    "early_stopping_min_delta": 1e-4,
    "gradient_clip_max_norm": 1.0,
    "optimizer": "AdamW",
    "scheduler": "CosineAnnealingLR",
    "loss_function": "BCEWithLogitsLoss",
    "use_class_weights": True
}

HEAD_CONFIG = {
    "latent_dim": 256,
    "hidden_dim": 128,
    "dropout": 0.3,
    "learning_rate": 1e-3,
    "weight_decay": 1e-2,
    "batch_size": 64,
    "max_epochs": 100,
    "early_stopping_patience": 15,
    "early_stopping_min_delta": 1e-4,
    "gradient_clip_max_norm": 1.0,
    "optimizer": "AdamW",
    "scheduler": "CosineAnnealingLR",
    "loss_function": "BCEWithLogitsLoss",
    "use_class_weights": True
}

## Cella 4: Definizione Classi Dataset e Modelli

In [36]:
# ------------------------------------------------------------------
# 1. Dataset classe per Training
# ------------------------------------------------------------------
class SimpleDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).float()  # BCE expects float
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ------------------------------------------------------------------
# 2. ENCODER: Maps Input Dimension -> Latent Dimension
# ------------------------------------------------------------------
class Encoder(nn.Module):
    def __init__(self, input_dim: int, latent_dim: int, hidden_dim: int = 1024, dropout: float = 0.3):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, latent_dim),
            nn.LayerNorm(latent_dim)  # Normalize latent space for stability
        )
        
    def forward(self, x):
        return self.net(x)

# ------------------------------------------------------------------
# 3. CLASSIFICATION HEAD: Maps Latent Dimension -> Probability
# ------------------------------------------------------------------
class ClassificationHead(nn.Module):
    def __init__(self, latent_dim: int, hidden_dim: int = 128, dropout: float = 0.3):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 1)  # Binary output
        )
        
    def forward(self, x):
        return self.net(x).squeeze(-1)

    def predict(self, x):
        with torch.no_grad():
            logits = self.forward(x)
            return (torch.sigmoid(logits) > 0.5).long()

## Cella 5: Funzioni Utilità per Caricamento e Seeding

In [37]:
def stats_per_json(model_name, dataset_name):
    """Load statistics from hallucination labels JSON"""
    file_path = os.path.join(PROJECT_ROOT, CACHE_DIR_NAME, model_name, dataset_name, 
                             "generations", "hallucination_labels.json")
    with open(file_path, 'r') as file:
        data = json.load(file)
    total = len(data)
    hallucinations = sum(1 for item in data if item['is_hallucination'])
    hallucinated_items = [item['instance_id'] for item in data if item['is_hallucination']]
    return {
        'total': total,
        'hallucinations': hallucinations,
        'hallucinated_items': hallucinated_items,
        'model_name': model_name
    }

def load_and_split_layers(model_name, dataset_name, layer_indices, type_layer, stats, 
                          train_indices, test_indices):
    """Load activation layers and split into train/test"""
    print(f" Loading {model_name} [{type_layer}]: layers {layer_indices}...")
    total_samples = stats['total']
    hallucinated_set = set(stats['hallucinated_items'])
    
    # Create labels
    y_full = np.zeros(total_samples, dtype=np.int8)
    y_full[list(hallucinated_set)] = 1
    y_train = y_full[train_indices]
    y_test = y_full[test_indices]

    # Load and concatenate features
    all_features = []
    for layer_idx in layer_indices:
        file_path = os.path.join(PROJECT_ROOT, CACHE_DIR_NAME, model_name, dataset_name,
                                 "activation_" + type_layer, f"layer{layer_idx}_activations.pt")
        if not os.path.exists(file_path):
            continue
        
        acts = torch.load(file_path, map_location='cpu')
        if acts.shape[0] > total_samples:
            acts = acts[:total_samples]
        
        X_layer = acts.float().numpy() if isinstance(acts, torch.Tensor) else acts.astype(np.float32)
        if X_layer.ndim > 2:
            X_layer = X_layer.reshape(X_layer.shape[0], -1)
        
        all_features.append(X_layer)
        del acts
        gc.collect()

    if not all_features:
        raise ValueError(f"No layers found for {model_name}")
    
    X_full = np.concatenate(all_features, axis=1)
    X_train = X_full[train_indices]
    X_test = X_full[test_indices]
    
    return X_train, X_test, y_train, y_test

def get_generator(seed=SEED):
    """Create reproducible generator for DataLoader"""
    g = torch.Generator()
    g.manual_seed(seed)
    return g

## Cella 6: Funzione Training Teacher (Encoder + Head Jointly)

In [38]:
def train_teacher_pipeline(X_train, y_train, X_val, y_val, input_dim, device, 
                          model_name, encoder_config=ENCODER_CONFIG, head_config=HEAD_CONFIG):
    """Train encoder + head jointly for teacher model"""
    print(f"   [Teacher] Training full pipeline for {model_name}...")
    
    # Initialize modules
    encoder = Encoder(input_dim, encoder_config['latent_dim'], 
                     encoder_config['hidden_dim'], encoder_config['dropout']).to(device)
    head = ClassificationHead(encoder_config['latent_dim'], 
                             head_config['hidden_dim'], head_config['dropout']).to(device)
    
    # Combine parameters for optimizer
    params = list(encoder.parameters()) + list(head.parameters())
    optimizer = optim.AdamW(params, lr=encoder_config['learning_rate'], 
                           weight_decay=encoder_config['weight_decay'])
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=encoder_config['max_epochs'])
    
    # Class weights for imbalance
    n_pos = y_train.sum()
    n_neg = len(y_train) - n_pos
    pos_weight = torch.tensor([n_neg / n_pos]).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    
    train_loader = DataLoader(SimpleDataset(X_train, y_train), 
                             batch_size=encoder_config['batch_size'], 
                             shuffle=True, generator=get_generator())
    val_loader = DataLoader(SimpleDataset(X_val, y_val), 
                           batch_size=encoder_config['batch_size'], shuffle=False)
    
    best_acc = 0.0
    patience_counter = 0
    best_states = None
    
    for epoch in range(encoder_config['max_epochs']):
        encoder.train()
        head.train()
        train_loss = 0.0
        
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            
            latents = encoder(X_batch)
            logits = head(latents)
            
            loss = criterion(logits, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(list(encoder.parameters()) + list(head.parameters()),
                                          max_norm=encoder_config['gradient_clip_max_norm'])
            optimizer.step()
            train_loss += loss.item()
        
        # Validation
        encoder.eval()
        head.eval()
        all_preds, all_labels = [], []
        
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch = X_batch.to(device)
                latents = encoder(X_batch)
                preds = head.predict(latents)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(y_batch.numpy())
        
        acc = accuracy_score(all_labels, all_preds)
        scheduler.step()
        
        if acc > best_acc:
            best_acc = acc
            patience_counter = 0
            best_states = {
                'encoder': encoder.state_dict().copy(),
                'head': head.state_dict().copy()
            }
        else:
            patience_counter += 1
            if patience_counter >= encoder_config['early_stopping_patience']:
                print(f"     Early stopping at epoch {epoch+1}. Best Acc: {best_acc:.4f}")
                break
    
    encoder.load_state_dict(best_states['encoder'])
    head.load_state_dict(best_states['head'])
    
    return encoder, head, best_acc

## Cella 7: Funzione Training Student (Adapter con Head Frozen)

In [39]:
def train_student_adapter(X_train, y_train, X_val, y_val, input_dim, frozen_head, device, 
                         student_name, encoder_config=ENCODER_CONFIG):
    """Train new encoder with frozen head from teacher"""
    print(f"   [Student] Training Adapter Encoder for {student_name} (Head Frozen)...")
    
    # Freeze the Head
    frozen_head.eval()
    for param in frozen_head.parameters():
        param.requires_grad = False
    
    # New Encoder for Student
    encoder = Encoder(input_dim, encoder_config['latent_dim'], 
                     encoder_config['hidden_dim'], encoder_config['dropout']).to(device)
    
    # Optimize ONLY the encoder
    optimizer = optim.AdamW(encoder.parameters(), lr=encoder_config['learning_rate'], 
                           weight_decay=encoder_config['weight_decay'])
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=encoder_config['max_epochs'])
    
    n_pos = y_train.sum()
    n_neg = len(y_train) - n_pos
    pos_weight = torch.tensor([n_neg / n_pos]).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    
    train_loader = DataLoader(SimpleDataset(X_train, y_train), 
                             batch_size=encoder_config['batch_size'], 
                             shuffle=True, generator=get_generator())
    val_loader = DataLoader(SimpleDataset(X_val, y_val), 
                           batch_size=encoder_config['batch_size'], shuffle=False)
    
    best_acc = 0.0
    patience_counter = 0
    best_state = None
    
    for epoch in range(encoder_config['max_epochs']):
        encoder.train()
        train_loss = 0.0
        
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            
            # Forward pass: Student Input -> Student Encoder -> Frozen Head -> Loss
            latents = encoder(X_batch)
            logits = frozen_head(latents)  # Head is fixed
            
            loss = criterion(logits, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(encoder.parameters(),
                                          max_norm=encoder_config['gradient_clip_max_norm'])
            optimizer.step()
            train_loss += loss.item()
        
        # Validation
        encoder.eval()
        all_preds, all_labels = [], []
        
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch = X_batch.to(device)
                latents = encoder(X_batch)
                preds = frozen_head.predict(latents)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(y_batch.numpy())
        
        acc = accuracy_score(all_labels, all_preds)
        scheduler.step()
        
        if acc > best_acc:
            best_acc = acc
            patience_counter = 0
            best_state = encoder.state_dict().copy()
        else:
            patience_counter += 1
            if patience_counter >= encoder_config['early_stopping_patience']:
                print(f"     Early stopping at epoch {epoch+1}. Best Acc: {best_acc:.4f}")
                break
    
    # FIX: Load best state AFTER the training loop ends
    if best_state is not None:
        encoder.load_state_dict(best_state)
    
    return encoder, best_acc

## Cella 8: Funzione Salvataggio Confusion Matrix

In [40]:
def plot_confusion_matrix(y_true, y_pred, title, filename):
    """Plot and save confusion matrix"""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(title)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    os.makedirs("confusion_matrices_frozen_head", exist_ok=True)
    plt.savefig(os.path.join("confusion_matrices_frozen_head", filename))
    plt.close()

## Cella 9: Main Execution - Setup Iniziale

In [41]:
# Load statistics
qwen_stats = stats_per_json("Qwen2.5-7B", "belief_bank")
falcon_stats = stats_per_json("Falcon3-7B-Base", "belief_bank")

# Prepare indices
n_samples = qwen_stats['total']
rng = np.random.RandomState(SEED)
shuffled_indices = rng.permutation(n_samples)
split = int(0.7 * n_samples)
train_indices, test_indices = shuffled_indices[:split], shuffled_indices[split:]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

results_log = []

# Define scenarios
scenarios = [
    {"teacher": "Qwen2.5-7B", "student": "Falcon3-7B-Base"},
    {"teacher": "Falcon3-7B-Base", "student": "Qwen2.5-7B"}
]

print(f"\nConfiguration:")
print(f"  LATENT_DIM: {ENCODER_CONFIG['latent_dim']}")
print(f"  HIDDEN_DIM: {ENCODER_CONFIG['hidden_dim']}")
print(f"  MAX_EPOCHS: {ENCODER_CONFIG['max_epochs']}")
print(f"  BATCH_SIZE: {ENCODER_CONFIG['batch_size']}")

Using device: cuda

Configuration:
  LATENT_DIM: 256
  HIDDEN_DIM: 512
  MAX_EPOCHS: 100
  BATCH_SIZE: 64


## Cella 10: Main Execution - Loop per Layer Types

In [42]:
for layer_type in ['attn', 'mlp', 'hidden']:
    print(f"\n{'='*60}")
    print(f"PROCESSING LAYER TYPE: {layer_type.upper()}")
    print(f"{'='*60}")
    
    # Load and Preprocess Data for BOTH models
    print("Loading data for Qwen...")
    X_qwen_tr, X_qwen_te, y_qwen_tr, y_qwen_te = load_and_split_layers(
        "Qwen2.5-7B", "belief_bank", LAYER_CONFIG["Qwen2.5-7B"][layer_type], 
        layer_type, qwen_stats, train_indices, test_indices)

    print("Loading data for Falcon...")
    X_falcon_tr, X_falcon_te, y_falcon_tr, y_falcon_te = load_and_split_layers(
        "Falcon3-7B-Base", "belief_bank", LAYER_CONFIG["Falcon3-7B-Base"][layer_type], 
        layer_type, falcon_stats, train_indices, test_indices)

    # Scaling (Independent for each model)
    s_qwen = StandardScaler()
    X_qwen_tr = s_qwen.fit_transform(X_qwen_tr)
    X_qwen_te = s_qwen.transform(X_qwen_te)

    s_falcon = StandardScaler()
    X_falcon_tr = s_falcon.fit_transform(X_falcon_tr)
    X_falcon_te = s_falcon.transform(X_falcon_te)

    # Pack data
    data_map = {
        "Qwen2.5-7B": {"X_tr": X_qwen_tr, "y_tr": y_qwen_tr, "X_te": X_qwen_te, "y_te": y_qwen_te},
        "Falcon3-7B-Base": {"X_tr": X_falcon_tr, "y_tr": y_falcon_tr, "X_te": X_falcon_te, "y_te": y_falcon_te}
    }

    # Run Both Scenarios
    for sc in scenarios:
        t_name = sc['teacher']
        s_name = sc['student']
        print(f"\n--- SCENARIO: Teacher={t_name} -> Student={s_name} ---")
        
        teacher_data = data_map[t_name]
        student_data = data_map[s_name]
        
        # Split Train into Train/Val for early stopping
        n_tr = len(teacher_data["X_tr"])
        idx = np.arange(n_tr)
        np.random.seed(SEED)  
        np.random.shuffle(idx)
        v_size = int(0.15 * n_tr)
        tr_idx, val_idx = idx[v_size:], idx[:v_size]
        
        # --- PHASE 1: Train Teacher ---
        enc_teacher, head_shared, best_acc_t = train_teacher_pipeline(
            teacher_data["X_tr"][tr_idx], teacher_data["y_tr"][tr_idx],
            teacher_data["X_tr"][val_idx], teacher_data["y_tr"][val_idx],
            input_dim=teacher_data["X_tr"].shape[1],
            device=device, model_name=t_name,
            encoder_config=ENCODER_CONFIG, head_config=HEAD_CONFIG
        )
        
        # Evaluate Teacher on Test
        enc_teacher.eval()
        head_shared.eval()
        with torch.no_grad():
            z_t = enc_teacher(torch.from_numpy(teacher_data["X_te"]).float().to(device))
            preds_t = head_shared.predict(z_t).cpu().numpy()
        
        t_f1 = f1_score(teacher_data["y_te"], preds_t)
        t_acc = accuracy_score(teacher_data["y_te"], preds_t)
        print(f"   [Result] Teacher ({t_name}) Test F1: {t_f1:.4f} | Acc: {t_acc:.4f}")
        plot_confusion_matrix(teacher_data["y_te"], preds_t, 
                             f"Teacher {t_name} ({layer_type})", 
                             f"cm_{layer_type}_teacher_{t_name}.png")

        # --- PHASE 2: Train Student with Frozen Head ---
        enc_student, best_acc_s = train_student_adapter(
            student_data["X_tr"][tr_idx], student_data["y_tr"][tr_idx],
            student_data["X_tr"][val_idx], student_data["y_tr"][val_idx],
            input_dim=student_data["X_tr"].shape[1],
            frozen_head=head_shared,
            device=device, student_name=s_name,
            encoder_config=ENCODER_CONFIG
        )
        
        # Evaluate Student on Test
        enc_student.eval()
        with torch.no_grad():
            z_s = enc_student(torch.from_numpy(student_data["X_te"]).float().to(device))
            preds_s = head_shared.predict(z_s).cpu().numpy()
        
        s_f1 = f1_score(student_data["y_te"], preds_s)
        s_acc = accuracy_score(student_data["y_te"], preds_s)
        
        print(f"   [Result] Student ({s_name}) Adapter Test F1: {s_f1:.4f} | Acc: {s_acc:.4f}")
        plot_confusion_matrix(student_data["y_te"], preds_s, 
                             f"Student {s_name} Adapter ({layer_type})", 
                             f"cm_{layer_type}_{s_name}_adapter.png")
        
        # --- PHASE 3: Save Models ---
        print("   Saving models...")
        model_save_dir = os.path.join("models_frozen_head", layer_type)
        os.makedirs(model_save_dir, exist_ok=True)
        
        # Save Teacher Encoder
        teacher_encoder_filename = os.path.join(model_save_dir, f"frozen_head_encoder_{t_name}.pt")
        torch.save({
            'model_state_dict': enc_teacher.state_dict(),
            'encoder_config': ENCODER_CONFIG,
            'input_dim': teacher_data["X_tr"].shape[1],
            'latent_dim': ENCODER_CONFIG['latent_dim'],
            'best_val_acc': best_acc_t,
            'model_name': t_name,
            'layer_type': layer_type,
            'scenario': f"{t_name}_teacher"
        }, teacher_encoder_filename)
        print(f"     ✓ Teacher Encoder saved: {teacher_encoder_filename}")
        
        # Save Shared Head
        head_filename = os.path.join(model_save_dir, f"frozen_head_shared_head_{t_name}.pt")
        torch.save({
            'model_state_dict': head_shared.state_dict(),
            'head_config': HEAD_CONFIG,
            'latent_dim': ENCODER_CONFIG['latent_dim'],
            'best_val_acc': best_acc_t,
            'teacher_model': t_name,
            'layer_type': layer_type,
            'scenario': f"{t_name}_head"
        }, head_filename)
        print(f"     ✓ Shared Head saved: {head_filename}")
        
        # Save Student Encoder
        student_encoder_filename = os.path.join(model_save_dir, f"frozen_head_encoder_{s_name}_adapter.pt")
        torch.save({
            'model_state_dict': enc_student.state_dict(),
            'encoder_config': ENCODER_CONFIG,
            'input_dim': student_data["X_tr"].shape[1],
            'latent_dim': ENCODER_CONFIG['latent_dim'],
            'best_val_acc': best_acc_s,
            'model_name': s_name,
            'layer_type': layer_type,
            'scenario': f"{s_name}_student_adapter"
        }, student_encoder_filename)
        print(f"     ✓ Student Encoder saved: {student_encoder_filename}")
        
        # Log results with model paths
        results_log.append({
            "layer": layer_type,
            "teacher": t_name,
            "student": s_name,
            "teacher_acc": t_acc,
            "teacher_f1": t_f1,
            "student_acc": s_acc,
            "student_f1": s_f1,
            "gap_acc": t_acc - s_acc,
            "teacher_encoder_path": teacher_encoder_filename,
            "shared_head_path": head_filename,
            "student_encoder_path": student_encoder_filename,
            "encoder_config": ENCODER_CONFIG,
            "head_config": HEAD_CONFIG
        })

    # Cleanup memory
    del X_qwen_tr, X_falcon_tr, data_map
    gc.collect()
    torch.cuda.empty_cache()


PROCESSING LAYER TYPE: ATTN
Loading data for Qwen...
 Loading Qwen2.5-7B [attn]: layers [15, 16, 18]...
Loading data for Falcon...
 Loading Falcon3-7B-Base [attn]: layers [2, 7, 12]...
Loading data for Falcon...
 Loading Falcon3-7B-Base [attn]: layers [2, 7, 12]...

--- SCENARIO: Teacher=Qwen2.5-7B -> Student=Falcon3-7B-Base ---
   [Teacher] Training full pipeline for Qwen2.5-7B...

--- SCENARIO: Teacher=Qwen2.5-7B -> Student=Falcon3-7B-Base ---
   [Teacher] Training full pipeline for Qwen2.5-7B...
     Early stopping at epoch 72. Best Acc: 0.9958
   [Result] Teacher (Qwen2.5-7B) Test F1: 0.9923 | Acc: 0.9910
     Early stopping at epoch 72. Best Acc: 0.9958
   [Result] Teacher (Qwen2.5-7B) Test F1: 0.9923 | Acc: 0.9910
   [Student] Training Adapter Encoder for Falcon3-7B-Base (Head Frozen)...
   [Student] Training Adapter Encoder for Falcon3-7B-Base (Head Frozen)...
     Early stopping at epoch 61. Best Acc: 0.9309
   [Result] Student (Falcon3-7B-Base) Adapter Test F1: 0.9386 | Acc: 

## Cella 11: Salvataggio Risultati e Summary

In [43]:
# Save final metrics with detailed information
os.makedirs("results_metrics", exist_ok=True)
metrics_file = "results_metrics/frozen_head_results_detailed.json"

detailed_results = []

for r in results_log:
    result_entry = {
        "layer_type": r['layer'],
        "teacher_model": r['teacher'],
        "student_model": r['student'],
        
        # ==================== ENCODER CONFIG ====================
        "encoder_config": {
            "latent_dim": r['encoder_config']['latent_dim'],
            "hidden_dim": r['encoder_config']['hidden_dim'],
            "dropout": r['encoder_config']['dropout'],
            "learning_rate": r['encoder_config']['learning_rate'],
            "weight_decay": r['encoder_config']['weight_decay'],
            "batch_size": r['encoder_config']['batch_size'],
            "max_epochs": r['encoder_config']['max_epochs'],
            "early_stopping_patience": r['encoder_config']['early_stopping_patience'],
            "early_stopping_min_delta": r['encoder_config']['early_stopping_min_delta'],
            "gradient_clip_max_norm": r['encoder_config']['gradient_clip_max_norm'],
            "optimizer": r['encoder_config']['optimizer'],
            "scheduler": r['encoder_config']['scheduler'],
            "loss_function": r['encoder_config']['loss_function'],
            "use_class_weights": r['encoder_config']['use_class_weights']
        },
        
        # ==================== HEAD CONFIG ====================
        "head_config": {
            "latent_dim": r['head_config']['latent_dim'],
            "hidden_dim": r['head_config']['hidden_dim'],
            "dropout": r['head_config']['dropout'],
            "learning_rate": r['head_config']['learning_rate'],
            "weight_decay": r['head_config']['weight_decay'],
            "batch_size": r['head_config']['batch_size'],
            "max_epochs": r['head_config']['max_epochs'],
            "early_stopping_patience": r['head_config']['early_stopping_patience'],
            "early_stopping_min_delta": r['head_config']['early_stopping_min_delta'],
            "gradient_clip_max_norm": r['head_config']['gradient_clip_max_norm'],
            "optimizer": r['head_config']['optimizer'],
            "scheduler": r['head_config']['scheduler'],
            "loss_function": r['head_config']['loss_function'],
            "use_class_weights": r['head_config']['use_class_weights']
        },
        
        # ==================== TEACHER ENCODER ====================
        "teacher_encoder": {
            "architecture": {
                "input_dim": None,  # Will be filled from saved model if needed
                "latent_dim": r['encoder_config']['latent_dim'],
                "hidden_dim": r['encoder_config']['hidden_dim'],
                "dropout": r['encoder_config']['dropout']
            },
            "training_results": {
                "best_val_acc": round(r['teacher_acc'], 4),  # Approximation
                "model_saved_path": r['teacher_encoder_path']
            }
        },
        
        # ==================== SHARED HEAD ====================
        "shared_head": {
            "architecture": {
                "latent_dim": r['encoder_config']['latent_dim'],
                "hidden_dim": r['head_config']['hidden_dim'],
                "dropout": r['head_config']['dropout']
            },
            "training_results": {
                "best_val_acc": round(r['teacher_acc'], 4),
                "model_saved_path": r['shared_head_path']
            }
        },
        
        # ==================== STUDENT ENCODER ====================
        "student_encoder": {
            "architecture": {
                "input_dim": None,  # Will be filled from saved model if needed
                "latent_dim": r['encoder_config']['latent_dim'],
                "hidden_dim": r['encoder_config']['hidden_dim'],
                "dropout": r['encoder_config']['dropout']
            },
            "training_results": {
                "best_val_acc": round(r['student_acc'], 4),
                "model_saved_path": r['student_encoder_path']
            }
        },
        
        # ==================== PERFORMANCE METRICS ====================
        "teacher_performance": {
            "accuracy": round(r['teacher_acc'], 4),
            "f1_score": round(r['teacher_f1'], 4)
        },
        "student_adapter_performance": {
            "accuracy": round(r['student_acc'], 4),
            "f1_score": round(r['student_f1'], 4)
        },
        "transfer_gap": {
            "accuracy_gap": round(r['gap_acc'], 4)
        }
    }
    
    detailed_results.append(result_entry)

with open(metrics_file, "w") as f:
    json.dump(detailed_results, f, indent=4)

print("\nDONE! Summary:")
for r in results_log:
    print(f"[{r['layer']}] {r['teacher']}->{r['student']} | T_Acc: {r['teacher_acc']:.3f} | S_Acc: {r['student_acc']:.3f} | Gap: {r['gap_acc']:.3f}")

print(f"\n✓ Detailed results saved to: {metrics_file}")
print("✓ Models saved in: models_frozen_head/")


DONE! Summary:
[attn] Qwen2.5-7B->Falcon3-7B-Base | T_Acc: 0.991 | S_Acc: 0.926 | Gap: 0.065
[attn] Falcon3-7B-Base->Qwen2.5-7B | T_Acc: 0.927 | S_Acc: 0.992 | Gap: -0.065
[mlp] Qwen2.5-7B->Falcon3-7B-Base | T_Acc: 0.987 | S_Acc: 0.914 | Gap: 0.073
[mlp] Falcon3-7B-Base->Qwen2.5-7B | T_Acc: 0.909 | S_Acc: 0.989 | Gap: -0.080
[hidden] Qwen2.5-7B->Falcon3-7B-Base | T_Acc: 0.986 | S_Acc: 0.907 | Gap: 0.080
[hidden] Falcon3-7B-Base->Qwen2.5-7B | T_Acc: 0.906 | S_Acc: 0.991 | Gap: -0.085

✓ Detailed results saved to: results_metrics/frozen_head_results_detailed.json
✓ Models saved in: models_frozen_head/
