In [None]:
import torch
import torch.nn.functional as F
from transformers import LlamaTokenizer, LlamaForCausalLM
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
import os
from typing import Dict, List, Tuple, Optional
import json
import gc

In [None]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MODEL_1_PATH = "meta-llama/Llama-2-7b-chat-hf" 
MODEL_2_PATH = "meta-llama/Llama-2-7b-hf"       

print(DEVICE)

device(type='cuda')

In [None]:
print(f"Using device: {DEVICE}")

# %%
tokenizer = LlamaTokenizer.from_pretrained(MODEL_1_PATH)
tokenizer.pad_token = tokenizer.eos_token

model_1 = LlamaForCausalLM.from_pretrained(
    MODEL_1_PATH,
    torch_dtype=torch.float16,
    device_map="auto"
)

model_2 = LlamaForCausalLM.from_pretrained(
    MODEL_2_PATH,
    torch_dtype=torch.float16,
    device_map="auto"
)

print("Models loaded successfully")

Loading checkpoint shards: 100%|██████████| 2/2 [00:13<00:00,  6.58s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:49<00:00, 24.82s/it]


In [None]:
model_1 = model_1.to(DEVICE)
model_2 = model_2.to(DEVICE)

In [None]:
# Global variables for activation capture
activations_model_1 = {}
activations_model_2 = {}
shared_weights = {}  # Store weights from model_1 only
current_hooks = []
hook_errors = []


In [None]:
def clear_activations():
    global activations_model_1, activations_model_2, shared_weights
    activations_model_1.clear()
    activations_model_2.clear()
    shared_weights.clear()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

def remove_all_hooks():
    global current_hooks
    for hook in current_hooks:
        try:
            hook.remove()
        except:
            pass
    current_hooks.clear()

def get_activation_hook(name, model_name):
    def hook(module, input, output):
        global hook_errors, shared_weights
        try:
            # Handle output
            if isinstance(output, tuple):
                activation = output[0] if len(output) > 0 and output[0] is not None else None
            else:
                activation = output
            
            # Handle input
            input_tensor = None
            if input is not None and isinstance(input, tuple) and len(input) > 0:
                input_tensor = input[0] if input[0] is not None else None
            
            # Store activation data (without weights)
            activation_data = {
                'output': activation.detach().cpu() if activation is not None else None,
                'input': input_tensor.detach().cpu() if input_tensor is not None else None,
            }
            
            # Only capture weights from Model_1 and store in shared dictionary
            if model_name == "Model_1":
                if name not in shared_weights:
                    shared_weights[name] = {
                        'weight': module.weight.detach().cpu() if hasattr(module, 'weight') and module.weight is not None else None,
                        'bias': module.bias.detach().cpu() if hasattr(module, 'bias') and module.bias is not None else None
                    }
                activations_model_1[name] = activation_data
            else:
                activations_model_2[name] = activation_data
                
        except Exception as e:
            error_msg = f"Hook error in {name} ({model_name}): {str(e)}"
            hook_errors.append(error_msg)
            print(f"WARNING: {error_msg}")
            
            # Store None data to prevent missing keys
            activation_data = {
                'output': None,
                'input': None,
            }
            
            if model_name == "Model_1":
                activations_model_1[name] = activation_data
                if name not in shared_weights:
                    shared_weights[name] = {'weight': None, 'bias': None}
            else:
                activations_model_2[name] = activation_data
            
    return hook

In [None]:
def register_llama_hooks(model, model_name, layer_range=None, max_layers=None):
    global current_hooks, hook_errors
    hooks = []
    successful_hooks = 0
    failed_hooks = 0
    
    hook_errors.clear()
    
    total_layers = len(model.model.layers)
    if max_layers is not None:
        total_layers = min(total_layers, max_layers)
    
    if layer_range is None:
        layer_range = range(total_layers)
    
    print(f"Registering hooks for {model_name}: {len(layer_range)} layers")
    
    for i in layer_range:
        if i >= len(model.model.layers):
            continue
            
        layer = model.model.layers[i]
        layer_prefix = f"layer_{i}"
        
        components_to_hook = [
            (layer.self_attn.q_proj, f"{layer_prefix}_attention_q"),
            (layer.self_attn.k_proj, f"{layer_prefix}_attention_k"),
            (layer.self_attn.v_proj, f"{layer_prefix}_attention_v"),
            (layer.self_attn.o_proj, f"{layer_prefix}_attention_output"),
            (layer.mlp.gate_proj, f"{layer_prefix}_mlp_gate"),
            (layer.mlp.up_proj, f"{layer_prefix}_mlp_up"),
            (layer.mlp.down_proj, f"{layer_prefix}_mlp_down"),
            (layer.input_layernorm, f"{layer_prefix}_input_norm"),
            (layer.post_attention_layernorm, f"{layer_prefix}_post_attn_norm"),
        ]
        
        for module, hook_name in components_to_hook:
            try:
                hook = module.register_forward_hook(
                    get_activation_hook(hook_name, model_name)
                )
                hooks.append(hook)
                successful_hooks += 1
            except Exception as e:
                error_msg = f"Failed to register {hook_name}: {str(e)}"
                hook_errors.append(error_msg)
                failed_hooks += 1
    
    # Register final components
    try:
        hooks.append(model.model.norm.register_forward_hook(
            get_activation_hook("final_norm", model_name)
        ))
        hooks.append(model.lm_head.register_forward_hook(
            get_activation_hook("lm_head", model_name)
        ))
        successful_hooks += 2
    except Exception as e:
        error_msg = f"Failed to register final components: {str(e)}"
        hook_errors.append(error_msg)
        failed_hooks += 2
    
    current_hooks.extend(hooks)
    
    print(f"Hook registration complete for {model_name}:")
    print(f"  ✓ Successful: {successful_hooks}")
    print(f"  ✗ Failed: {failed_hooks}")
    
    return hooks

def select_neurons_per_token_position(activations1, activations2, shared_weights, mode='min', seed=42):
    """
    Select neurons per token position based on calculation error.
    
    Args:
        activations1: Activations from model 1
        activations2: Activations from model 2
        shared_weights: Shared weights from model 1
        mode: 'min' (minimum calculation error) or 'random' (random selection)
        seed: Random seed for reproducibility
    """
    np.random.seed(seed)
    torch.manual_seed(seed)
    selected_neurons = {}
    
    print(f"Selecting neurons per token position from {len(activations1)} layers...")
    print(f"Selection mode: {mode}")
    
    for layer_name, layer_data in activations1.items():
        if not isinstance(layer_data, dict):
            continue
            
        activation1 = layer_data.get('output')
        activation2 = activations2.get(layer_name, {}).get('output')
        weight_data = shared_weights.get(layer_name, {})

        if activation1 is None or activation2 is None:
            print(f"Skipping {layer_name}: Missing activation data")
            continue
        
        try:
            if len(activation1.shape) == 3:  # [batch, seq_len, hidden_size]
                batch_size, seq_len, hidden_size = activation1.shape
                
                if hidden_size == 0:
                    continue
                
                # Select neurons for EACH token position separately
                token_selections = {}
                
                for token_pos in range(seq_len):
                    # Get activations for this specific token position
                    token_act1 = activation1[0, token_pos, :]  # [hidden_size]
                    token_act2 = activation2[0, token_pos, :]  # [hidden_size]
                    
                    # Select neuron based on mode
                    if mode == 'random':
                        neuron_idx = torch.randint(0, hidden_size, (1,)).item()
                        diff_value = torch.abs(token_act1[neuron_idx] - token_act2[neuron_idx]).item()
                    elif mode == 'min':
                        # Calculate differences for this token
                        diff = torch.abs(token_act1 - token_act2)
                        neuron_idx = torch.argmin(diff).item()
                        diff_value = diff[neuron_idx].item()
                    else:
                        # Default to min
                        diff = torch.abs(token_act1 - token_act2)
                        neuron_idx = torch.argmin(diff).item()
                        diff_value = diff[neuron_idx].item()
                    
                    token_selections[token_pos] = {
                        'neuron_index': neuron_idx,
                        'difference': diff_value,
                        'activation1_value': token_act1[neuron_idx].item(),
                        'activation2_value': token_act2[neuron_idx].item(),
                        'abs_activation1': abs(token_act1[neuron_idx].item()),
                        'abs_activation2': abs(token_act2[neuron_idx].item()),
                        'selection_mode': mode
                    }
                
                selected_neurons[layer_name] = {
                    'per_token_selections': token_selections,
                    'sequence_length': seq_len,
                    'hidden_size': hidden_size,
                    'activation_shape': list(activation1.shape),
                    'layer_type': get_component_type(layer_name)
                }
                
        except Exception as e:
            print(f"Error selecting neurons for {layer_name}: {e}")
            continue
            
    print(f"Successfully selected neurons from {len(selected_neurons)} layers")
    return selected_neurons

def get_component_type(layer_name):
    if 'attention' in layer_name:
        return 'attention'
    elif 'mlp' in layer_name:
        return 'mlp'
    elif 'norm' in layer_name:
        return 'normalization'
    elif 'lm_head' in layer_name:
        return 'output'
    elif 'embed' in layer_name:
        return 'embedding'
    else:
        return 'other'

In [None]:
def calculate_single_token_neuron(layer_name, neuron_idx, token_pos, 
                                 layer_1_data, layer_2_data, shared_weight_data):

    # Get inputs for this specific token from both models
    input_tensor_1 = layer_1_data.get('input')
    input_tensor_2 = layer_2_data.get('input')
    
    if input_tensor_1 is None or token_pos >= input_tensor_1.shape[1]:
        return {'error': 'Missing or invalid input data from model 1'}
    if input_tensor_2 is None or token_pos >= input_tensor_2.shape[1]:
        return {'error': 'Missing or invalid input data from model 2'}
    
    # Get input for this specific token from both models
    token_input_1 = input_tensor_1[0, token_pos, :]  # [hidden_size]
    token_input_2 = input_tensor_2[0, token_pos, :]  # [hidden_size]
    
    # Get weights from model_1 ONLY (shared weights)
    w1 = shared_weight_data.get('weight')
    b1 = shared_weight_data.get('bias')
    
    if w1 is None:
        return {'error': 'Missing weight data'}
    
    try:
        # Calculate for this specific token and neuron
        # IMPORTANT: Both calculations use model_1 weights (w1, b1)
        # but with different inputs (token_input_1 vs token_input_2)
        if 'norm' in layer_name:
            # Layer norm calculation: weight * normalized_input + bias
            if neuron_idx >= w1.shape[0] or neuron_idx >= token_input_1.shape[0]:
                return {'error': 'Index out of bounds for layer norm'}
                
            # Both use w1 weights, different inputs
            calc_1 = w1[neuron_idx].item() * token_input_1[neuron_idx].item()
            calc_2 = w1[neuron_idx].item() * token_input_2[neuron_idx].item()
            
            if b1 is not None and neuron_idx < b1.shape[0]:
                calc_1 += b1[neuron_idx].item()
                calc_2 += b1[neuron_idx].item()
                
        else:
            # Linear layer calculation: input @ weight.T + bias
            if neuron_idx >= w1.shape[0]:
                return {'error': 'Neuron index out of bounds'}
                
            # Both use w1 weights, different inputs
            calc_1 = torch.matmul(token_input_1, w1[neuron_idx, :]).item()
            calc_2 = torch.matmul(token_input_2, w1[neuron_idx, :]).item()
            
            if b1 is not None and neuron_idx < b1.shape[0]:
                calc_1 += b1[neuron_idx].item()
                calc_2 += b1[neuron_idx].item()
            
            # Apply activation function for MLP components
            if 'mlp_gate' in layer_name or 'mlp_up' in layer_name:
                calc_1 = F.silu(torch.tensor(calc_1)).item()
                calc_2 = F.silu(torch.tensor(calc_2)).item()
        
        # Get actual outputs from the models
        actual_1 = layer_1_data.get('output')
        actual_2 = layer_2_data.get('output')
        
        actual_1_val = None
        actual_2_val = None
        
        if actual_1 is not None and token_pos < actual_1.shape[1] and neuron_idx < actual_1.shape[2]:
            actual_1_val = actual_1[0, token_pos, neuron_idx].item()
        if actual_2 is not None and token_pos < actual_2.shape[1] and neuron_idx < actual_2.shape[2]:
            actual_2_val = actual_2[0, token_pos, neuron_idx].item()
        
        # Calculate errors between our calculations and actual outputs
        calc_error_1 = abs(calc_1 - actual_1_val) if actual_1_val is not None else None
        calc_error_2 = abs(calc_2 - actual_2_val) if actual_2_val is not None else None
        
        return {
            'token_position': token_pos,
            'neuron_index': neuron_idx,
            'model_1_calculated': calc_1,
            'model_2_calculated': calc_2,
            'calculation_difference': calc_1 - calc_2,
            'model_1_actual': actual_1_val,
            'model_2_actual': actual_2_val,
            'actual_difference': (actual_1_val - actual_2_val) if (actual_1_val is not None and actual_2_val is not None) else None,
            'calculation_error_1': calc_error_1,
            'calculation_error_2': calc_error_2,
            'layer_type': get_component_type(layer_name)
        }
        
    except Exception as e:
        return {'error': f'Calculation failed: {str(e)}'}

def compare_neuron_calculations_per_token(model_1_activations, model_2_activations, 
                                        selected_neurons, shared_weights):
    comparison_results = {}
    
    print(f"Comparing calculations for {len(selected_neurons)} layers...")
    
    for layer_name, neuron_info in selected_neurons.items():
        if 'per_token_selections' not in neuron_info:
            continue
            
        results = {
            'layer_type': neuron_info.get('layer_type', get_component_type(layer_name)),
            'sequence_length': neuron_info['sequence_length'],
            'hidden_size': neuron_info['hidden_size'],
            'token_analyses': {},
            'summary_stats': {}
        }
        
        # Get layer data (activations only)
        layer_1_data = model_1_activations.get(layer_name, {})
        layer_2_data = model_2_activations.get(layer_name, {})
        # Get shared weights from model_1
        shared_weight_data = shared_weights.get(layer_name, {})
        
        if not isinstance(layer_1_data, dict) or not isinstance(layer_2_data, dict):
            print(f"Skipping {layer_name}: Invalid layer data")
            continue
        
        # Analyze each token position with its selected neuron
        valid_analyses = 0
        calc_diffs = []
        actual_diffs = []
        calc_errors_1 = []
        calc_errors_2 = []
        
        for token_pos, token_data in neuron_info['per_token_selections'].items():
            neuron_idx = token_data['neuron_index']
            
            # Calculate for this specific token and neuron using shared weights
            token_analysis = calculate_single_token_neuron(
                layer_name, neuron_idx, token_pos,
                layer_1_data, layer_2_data, shared_weight_data
            )
            
            # Add selection info to analysis
            if 'error' not in token_analysis:
                token_analysis.update({
                    'selected_activation1': token_data['activation1_value'],
                    'selected_activation2': token_data['activation2_value'],
                    'selection_difference': token_data['difference'],
                    'selection_mode': token_data.get('selection_mode', 'unknown')
                })
                
                valid_analyses += 1
                calc_diffs.append(token_analysis['calculation_difference'])
                
                if token_analysis['actual_difference'] is not None:
                    actual_diffs.append(token_analysis['actual_difference'])
                if token_analysis['calculation_error_1'] is not None:
                    calc_errors_1.append(token_analysis['calculation_error_1'])
                if token_analysis['calculation_error_2'] is not None:
                    calc_errors_2.append(token_analysis['calculation_error_2'])
            
            results['token_analyses'][token_pos] = token_analysis
        
        # Calculate summary statistics
        if valid_analyses > 0:
            results['summary_stats'] = {
                'valid_analyses': valid_analyses,
                'total_tokens': len(neuron_info['per_token_selections']),
                'mean_calc_difference': np.mean(calc_diffs) if calc_diffs else None,
                'std_calc_difference': np.std(calc_diffs) if calc_diffs else None,
                'max_abs_calc_difference': max([abs(d) for d in calc_diffs]) if calc_diffs else None,
                'mean_actual_difference': np.mean(actual_diffs) if actual_diffs else None,
                'mean_calc_error_1': np.mean(calc_errors_1) if calc_errors_1 else None,
                'mean_calc_error_2': np.mean(calc_errors_2) if calc_errors_2 else None,
                'unique_neurons_selected': len(set(td['neuron_index'] for td in neuron_info['per_token_selections'].values()))
            }
        
        comparison_results[layer_name] = results
    
    print(f"Completed comparisons for {len(comparison_results)} layers")
    return comparison_results

def save_detailed_results_per_token(comparison_results, filename="per_token_neuron_analysis.csv"):
    rows = []
    
    input_text = comparison_results.get('input_text', 'Unknown')
    
    for layer_name, layer_data in comparison_results.get('layer_comparisons', {}).items():
        if 'token_analyses' not in layer_data:
            continue
            
        for token_pos, token_analysis in layer_data['token_analyses'].items():
            if 'error' in token_analysis:
                # Save error rows too
                row = {
                    'input_text': input_text[:100],
                    'layer_name': layer_name,
                    'layer_type': layer_data.get('layer_type', 'unknown'),
                    'token_position': token_pos,
                    'neuron_index': None,
                    'error': token_analysis['error'],
                    'model_1_calculated': None,
                    'model_2_calculated': None,
                    'calculation_difference': None,
                    'model_1_actual': None,
                    'model_2_actual': None,
                    'actual_difference': None,
                    'calculation_error_1': None,
                    'calculation_error_2': None,
                    'selected_activation1': None,
                    'selected_activation2': None,
                    'selection_difference': None,
                    'selection_method': None
                }
            else:
                row = {
                    'input_text': input_text[:100],
                    'layer_name': layer_name,
                    'layer_type': token_analysis.get('layer_type', 'unknown'),
                    'token_position': token_analysis['token_position'],
                    'neuron_index': token_analysis['neuron_index'],
                    'error': None,
                    'model_1_calculated': token_analysis['model_1_calculated'],
                    'model_2_calculated': token_analysis['model_2_calculated'],
                    'calculation_difference': token_analysis['calculation_difference'],
                    'abs_calculation_difference': abs(token_analysis['calculation_difference']),
                    'model_1_actual': token_analysis['model_1_actual'],
                    'model_2_actual': token_analysis['model_2_actual'],
                    'actual_difference': token_analysis['actual_difference'],
                    'abs_actual_difference': abs(token_analysis['actual_difference']) if token_analysis['actual_difference'] is not None else None,
                    'calculation_error_1': token_analysis['calculation_error_1'],
                    'calculation_error_2': token_analysis['calculation_error_2'],
                    'selected_activation1': token_analysis.get('selected_activation1'),
                    'selected_activation2': token_analysis.get('selected_activation2'),
                    'selection_difference': token_analysis.get('selection_difference'),
                    'selection_mode': token_analysis.get('selection_mode')
                }
            
            rows.append(row)
    
    df = pd.DataFrame(rows)
    
    # Save to CSV
    if os.path.exists(filename):
        df.to_csv(filename, mode='a', header=False, index=False)
    else:
        df.to_csv(filename, index=False)
    
    print(f"Saved {len(rows)} rows to {filename}")
    return df

def run_comparison_per_token(text_input, mode='min', seed=42, max_layers=None):
    """
    Run comparison between model 1 and model 2 using model 1 weights.
    
    Args:
        text_input: Input text to process
        mode: 'min' (minimum error) or 'random' (random neuron selection)
        seed: Random seed
        max_layers: Maximum number of layers to process (None for all)
    """
    print(f"\n{'='*60}")
    print(f"Processing: {text_input[:50]}...")
    print(f"Selection mode: {mode}")
    print(f"{'='*60}")
    
    # Clear previous data and free memory
    clear_activations()
    remove_all_hooks()
    
    # Tokenize input
    inputs = tokenizer(
        text_input, 
        return_tensors="pt", 
        padding=True, 
        truncation=True,
        max_length=512
    )
    inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
    
    print(f"Input tokens: {inputs['input_ids'].shape[1]}")
    
    try:
        # Register hooks
        print("\n1. Registering hooks...")
        hooks_1 = register_llama_hooks(model_1, "Model_1", max_layers=max_layers)
        hooks_2 = register_llama_hooks(model_2, "Model_2", max_layers=max_layers)
        
        if len(hooks_1) == 0 or len(hooks_2) == 0:
            raise Exception("Failed to register hooks")
        
        # Run models
        print("\n2. Running models...")
        with torch.no_grad():
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            
            outputs_1 = model_1(**inputs)
            
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            
            outputs_2 = model_2(**inputs)
            
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
        
        print(f"\n3. Activation capture results:")
        print(f"   Model 1: {len(activations_model_1)} layers captured")
        print(f"   Model 2: {len(activations_model_2)} layers captured")
        
        # Select neurons per token position
        print("\n4. Selecting neurons per token position...")
        selected_neurons = select_neurons_per_token_position(
            activations_model_1, activations_model_2, shared_weights,
            mode=mode, seed=seed
        )
        
        # Compare activations using shared weights from model_1
        print("\n5. Comparing activations...")
        comparison_results = compare_neuron_calculations_per_token(
            activations_model_1,
            activations_model_2,
            selected_neurons,
            shared_weights
        )
        
        print(f"\n6. Results summary:")
        print(f"   Layers with comparisons: {len(comparison_results)}")
        
        # Calculate overall statistics
        total_valid = sum(r['summary_stats'].get('valid_analyses', 0) for r in comparison_results.values())
        total_tokens = sum(r['summary_stats'].get('total_tokens', 0) for r in comparison_results.values())
        
        print(f"   Total valid analyses: {total_valid}")
        print(f"   Total token positions: {total_tokens}")
        
        return {
            'input_text': text_input,
            'tokenized_input': inputs,
            'model_1_output': outputs_1.logits,
            'model_2_output': outputs_2.logits,
            'layer_comparisons': comparison_results,
            'selected_neurons': selected_neurons,
            'hook_errors': hook_errors.copy(),
            'layers_captured_1': len(activations_model_1),
            'layers_captured_2': len(activations_model_2),
            'shared_weights_captured': len(shared_weights),
            'selection_mode': mode,
            'total_valid_analyses': total_valid,
            'total_token_positions': total_tokens
        }
    
    except Exception as e:
        print(f"\nERROR in run_comparison_per_token: {e}")
        import traceback
        traceback.print_exc()
        
        return {
            'input_text': text_input,
            'error': str(e),
            'layer_comparisons': {},
            'selected_neurons': {},
            'hook_errors': hook_errors.copy(),
            'layers_captured_1': len(activations_model_1),
            'layers_captured_2': len(activations_model_2),
            'shared_weights_captured': len(shared_weights),
            'selection_mode': mode
        }
    
    finally:
        remove_all_hooks()
        clear_activations()

In [None]:
TEST_TEXTS = [
    "The quick brown fox jumps over the lazy dog.",
    "Artificial intelligence is transforming the world of technology.",
    "In a hole in the ground there lived a hobbit.",
    "To be or not to be, that is the question Shakespeare posed.",
    "Machine learning models require large datasets for training.",
    "The mitochondria is the powerhouse of the cell in biology.",
    "Climate change is causing unprecedented shifts in global weather patterns.",
    "Mozart composed his first symphony at the age of eight years old.",
    "The stock market experienced significant volatility during the pandemic crisis.",
    "Quantum physics reveals the strange behavior of particles at subatomic levels.",
    "Professional chefs recommend using fresh herbs to enhance flavor profiles.",
    "Ancient Egyptian pyramids were built using sophisticated engineering techniques.",
    "Regular exercise and proper nutrition are essential for maintaining good health.",
    "The International Space Station orbits Earth approximately every ninety minutes.",
    "Cryptocurrency markets operate twenty-four hours a day across global exchanges.",
    "Vincent van Gogh painted Starry Night while staying at an asylum.",
    "Professional athletes must maintain strict training regimens throughout their careers.",
    "The Amazon rainforest produces twenty percent of the world's oxygen supply.",
    "Modern architecture emphasizes clean lines and functional design principles.",
    "Forensic scientists use DNA analysis to solve complex criminal investigations.",
    "Traditional Japanese tea ceremonies follow centuries-old ritualistic practices.",
    "Marine biologists study coral reef ecosystems threatened by ocean acidification.",
    "The Renaissance period marked a cultural rebirth in European art and science.",
    "Cybersecurity experts work tirelessly to protect digital infrastructure from threats.",
    "Sustainable agriculture practices help preserve soil quality for future generations."
]


In [None]:
# Run with your preferred mode
PREFERRED_MODE = 'min'  # Options: 'min' or 'random'

print(f"\n{'='*60}")
print(f"Running full analysis with mode: {PREFERRED_MODE}")
print(f"{'='*60}")

all_results = []

for i, text in enumerate(TEST_TEXTS):
    print(f"\n=== Processing text {i+1}/{len(TEST_TEXTS)} ===")
    
    try:
        result = run_comparison_per_token(
            text, 
            mode=PREFERRED_MODE,
            seed=42+i,
            max_layers=None  # Use all layers
        )
        
        all_results.append(result)
        
        # Save detailed results
        save_detailed_results_per_token(
            result, 
            filename=f"all_texts_per_token_{PREFERRED_MODE}.csv"
        )
        
        print(f"✓ Completed text {i+1}")
        
    except Exception as e:
        print(f"✗ Error processing text {i+1}: {e}")
        continue

print(f"\n{'='*60}")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.



=== Processing text 1/5 ===
Processing: The quick brown fox jumps over the lazy dog....
Input tokens: 13
Registering hooks...
Running models...
Captured 290 activations from Model 1
Captured 290 activations from Model 2
Selecting random neurons...
Selected neurons from 290 layers
Comparing activations...
Visualization saved to neuron_comparison_text_0.png
Completed text 1

=== Processing text 2/5 ===
Processing: Artificial intelligence is transforming the world ...
Input tokens: 13
Registering hooks...
Running models...
Captured 290 activations from Model 1
Captured 290 activations from Model 2
Selecting random neurons...
Selected neurons from 290 layers
Comparing activations...
Visualization saved to neuron_comparison_text_1.png
Completed text 2

=== Processing text 3/5 ===
Processing: In a hole in the ground there lived a hobbit....
Input tokens: 14
Registering hooks...
Running models...
Captured 290 activations from Model 1
Captured 290 activations from Model 2
Selecting random neu