In [8]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import partial
from typing import Optional, Dict, List, Tuple
import os
import csv
import torch
from tqdm import tqdm
import gc


In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if DEVICE == "cuda":
    torch.cuda.empty_cache()
    torch.cuda.synchronize()
    
    # Print CUDA details
    print(f"CUDA Device: {torch.cuda.get_device_name()}")
    print(f"CUDA Memory Allocated: {torch.cuda.memory_allocated()/1024**2:.2f}MB")
    print(f"CUDA Memory Reserved: {torch.cuda.memory_reserved()/1024**2:.2f}MB")

In [None]:
def load_model(model_name):
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        trust_remote_code=True,
        # use_flash_attention_2=True,
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

model, tokenizer = load_model("deepseek-ai/deepseek-moe-16b-base")
model.eval()
model.to(DEVICE)

In [10]:
import torch
import torch.nn as nn
from typing import Dict, List, Tuple, Optional
from collections import defaultdict

class DeepseekMoEHook:
    """
    Hook into DeepSeek MoE models to capture router logits, hidden states, and expert outputs
    for the last token after processing the whole sequence.
    """
    
    def __init__(self, model, k_experts: int = 6):
        """
        Initialize the hook collector.
        
        Args:
            model: DeepSeek model to hook into
            k_experts: Number of top experts to track (default: 6)
        """
        self.model = model
        self.device = DEVICE  # Use the global DEVICE variable
        self.k_experts = k_experts
        self.hooks = []
        
        # Containers for collected data
        self.router_logits = {}
        self.layer_hidden_states = {}
        self.top1_expert_hidden_states = {}
        self.topk_expert_hidden_states = {}
        self.residual_streams = {}  # Add container for residual streams
        
        # Track expert outputs for matching later
        self.expert_outputs = defaultdict(dict)
        
        # Set up hooks
        self._register_hooks()
    
    def _register_hooks(self):
        """Register all hooks on the model."""
        
        # Hook to capture router logits and selected experts
        def hook_router_logits(layer_idx):
            def hook_fn(module, inputs, outputs):
                # Get input hidden states
                hidden_states = inputs[0]
                
                # Calculate router logits (handle data types for precision)
                if self.device.type == 'cuda':
                    # Use half precision for GPU to save memory
                    router_logits = torch.matmul(hidden_states.half(), module.weight.T.half())
                else:
                    router_logits = torch.matmul(hidden_states, module.weight.T)
                
                # Get the last token router logits
                if len(router_logits.shape) == 3:  # [batch, seq_len, num_experts]
                    self.router_logits[layer_idx] = router_logits[:, -1, :].detach()
                else:  # [seq_len, num_experts]
                    self.router_logits[layer_idx] = router_logits[-1:, :].detach()
                
                # Get expert indices and weights from outputs
                topk_idx, topk_weight, _ = outputs
                
                # Handle different tensor shapes
                if len(topk_idx.shape) == 3:  # [batch, seq_len, top_k]
                    # Extract data for last token
                    self.top1_expert_hidden_states[layer_idx] = {
                        'expert_idx': topk_idx[:, -1, 0].detach(),  # Top 1 expert
                        'expert_weight': topk_weight[:, -1, 0].detach(),  # Top 1 weight
                        'token_idx': hidden_states.size(1) - 1  # Last token position
                    }
                    
                    self.topk_expert_hidden_states[layer_idx] = {
                        'expert_idx': topk_idx[:, -1, :].detach(),  # Top k experts
                        'expert_weight': topk_weight[:, -1, :].detach(),  # Top k weights
                        'token_idx': hidden_states.size(1) - 1  # Last token position
                    }
                elif len(topk_idx.shape) == 2:  # [seq_len, top_k]
                    # Extract data for last token
                    self.top1_expert_hidden_states[layer_idx] = {
                        'expert_idx': topk_idx[-1, 0].detach().unsqueeze(0),  # Top 1 expert
                        'expert_weight': topk_weight[-1, 0].detach().unsqueeze(0),  # Top 1 weight
                        'token_idx': hidden_states.size(0) - 1  # Last token position
                    }
                    
                    self.topk_expert_hidden_states[layer_idx] = {
                        'expert_idx': topk_idx[-1, :].detach().unsqueeze(0),  # Top k experts
                        'expert_weight': topk_weight[-1, :].detach().unsqueeze(0),  # Top k weights
                        'token_idx': hidden_states.size(0) - 1  # Last token position
                    }
                
                # Store the residual stream (input to MoE)
                self.residual_streams[layer_idx] = hidden_states[:, -1, :].detach()
                
                return outputs
            return hook_fn
        
        # Hook to capture expert outputs
        def hook_expert_output(layer_idx, expert_idx):
            def hook_fn(module, inputs, outputs):
                # Save the expert's output for later matching
                # In DeepseekMoE, each expert receives only the tokens routed to it
                # We'll identify which one came from our last token in post-processing
                self.expert_outputs[layer_idx][expert_idx] = outputs.detach()
                return outputs
            return hook_fn
        
        # Hook to capture layer hidden states (final combined output)
        def hook_layer_output(layer_idx):
            def hook_fn(module, inputs, outputs):
                # Get hidden states
                hidden_states = outputs[0] if isinstance(outputs, tuple) else outputs
                
                # Store only the hidden states for the last token
                self.layer_hidden_states[layer_idx] = hidden_states[:, -1, :].detach()
                return outputs
            return hook_fn
        
        # Hook to capture pre-MoE residual streams
        def hook_post_attn_ln(layer_idx):
            def hook_fn(module, inputs, outputs):
                # Capture the output of post-attention layer norm
                # This is the input to the MoE module (the residual stream)
                self.residual_streams[layer_idx] = outputs[:, -1, :].detach()
                return outputs
            return hook_fn
        
        # Hook to capture final hidden states after MoE processing
        def hook_moe_output(layer_idx):
            def hook_fn(module, inputs, outputs):
                # Get the combined MoE output
                combined_output = outputs[:, -1, :].detach()
                
                # Save the combined output in our tracker dictionaries
                if layer_idx in self.top1_expert_hidden_states:
                    self.top1_expert_hidden_states[layer_idx]['final_hidden_state'] = combined_output
                    self.topk_expert_hidden_states[layer_idx]['final_hidden_state'] = combined_output
                
                return outputs
            return hook_fn
        
        # Register hooks for each layer
        for layer_idx, layer in enumerate(self.model.model.layers):
            # Register hook to capture final output of each layer
            self.hooks.append(layer.register_forward_hook(hook_layer_output(layer_idx)))
            
            # If it's an MoE layer, register additional hooks
            if hasattr(layer.mlp, 'experts') and layer.mlp.__class__.__name__ == 'DeepseekMoE':
                # Hook for post attention layer norm (to get residual stream)
                self.hooks.append(layer.post_attention_layernorm.register_forward_hook(
                    hook_post_attn_ln(layer_idx)))
                
                # Hook for router
                self.hooks.append(layer.mlp.gate.register_forward_hook(hook_router_logits(layer_idx)))
                
                # Hook for each expert
                for expert_idx, expert in enumerate(layer.mlp.experts):
                    self.hooks.append(expert.register_forward_hook(hook_expert_output(layer_idx, expert_idx)))
                
                # Hook for shared expert if it exists
                if hasattr(layer.mlp, 'shared_experts'):
                    self.hooks.append(layer.mlp.shared_experts.register_forward_hook(
                        hook_expert_output(layer_idx, 'shared')))
                
                # Hook for final MoE output
                self.hooks.append(layer.mlp.register_forward_hook(hook_moe_output(layer_idx)))
    def _match_expert_outputs(self):
        """Match expert outputs with correct weight prioritization."""
        for layer_idx in self.top1_expert_hidden_states:
            if layer_idx not in self.expert_outputs:
                continue
                
            # Get residual stream for this layer
            residual = self.residual_streams.get(layer_idx, None)
            
            # Process top k experts first to identify actual highest weight expert
            batch_size = self.topk_expert_hidden_states[layer_idx]['expert_idx'].size(0)
            for batch_idx in range(batch_size):
                expert_indices = self.topk_expert_hidden_states[layer_idx]['expert_idx'][batch_idx]
                expert_weights = self.topk_expert_hidden_states[layer_idx]['expert_weight'][batch_idx]
                
                # Find the expert with the highest weight
                max_weight_idx = torch.argmax(expert_weights).item()
                max_weight_expert_idx = expert_indices[max_weight_idx].item()
                max_weight = expert_weights[max_weight_idx].item()
                
                # Update top1_expert_hidden_states with the actual highest weight expert
                self.top1_expert_hidden_states[layer_idx]['expert_idx'][batch_idx] = torch.tensor([max_weight_expert_idx], 
                                                                                             device=self.device)
                self.top1_expert_hidden_states[layer_idx]['expert_weight'][batch_idx] = torch.tensor([max_weight], 
                                                                                                device=self.device)
                
                # Create dict for expert hidden states if it doesn't exist
                if 'expert_hidden_states' not in self.top1_expert_hidden_states[layer_idx]:
                    self.top1_expert_hidden_states[layer_idx]['expert_hidden_states'] = {}
                
                # Store expert hidden states for top 1 expert
                if max_weight_expert_idx in self.expert_outputs[layer_idx]:
                    expert_output = self.expert_outputs[layer_idx][max_weight_expert_idx]
                    if len(expert_output.shape) > 1:
                        # Get the expert output
                        expert_hidden = expert_output[0].to(self.device)
                        
                        # Add residual if available
                        if residual is not None:
                            expert_hidden = expert_hidden + residual[batch_idx].to(self.device)
                        
                        self.top1_expert_hidden_states[layer_idx]['expert_hidden_states'][batch_idx] = expert_hidden
            
                # Create dict for all expert hidden states if it doesn't exist
                if 'expert_hidden_states' not in self.topk_expert_hidden_states[layer_idx]:
                    self.topk_expert_hidden_states[layer_idx]['expert_hidden_states'] = {}
                
                # Store for each expert in top-k
                expert_states = {}
                for k_idx, expert_idx in enumerate(expert_indices):
                    expert_idx = expert_idx.item()
                    if expert_idx in self.expert_outputs[layer_idx]:
                        expert_output = self.expert_outputs[layer_idx][expert_idx]
                        if len(expert_output.shape) > 1:
                            # Get the expert output
                            expert_hidden = expert_output[0].to(self.device)
                            
                            # Add residual if available
                            if residual is not None:
                                expert_hidden = expert_hidden + residual[batch_idx].to(self.device)
                            
                            expert_states[k_idx] = expert_hidden
                
                self.topk_expert_hidden_states[layer_idx]['expert_hidden_states'][batch_idx] = expert_states
                
                # Now compute the combined/ensemble hidden state from just the top-k experts
                # (without shared expert contribution)
                if expert_states:  # Only if we have any expert states
                    hidden_dim = next(iter(expert_states.values())).shape[-1]
                    combined_topk = torch.zeros(hidden_dim, device=self.device)
                    total_weight = 0.0
                    
                    for k_idx, expert_idx in enumerate(expert_indices):
                        if k_idx in expert_states:
                            expert_state = expert_states[k_idx]
                            expert_weight = expert_weights[k_idx]
                            combined_topk += expert_weight * expert_state
                            total_weight += expert_weight
                    
                    # Normalize if weights don't sum to 1.0
                    if total_weight > 0 and abs(total_weight - 1.0) > 1e-5:
                        combined_topk /= total_weight
                    
                    # Store the combined hidden state
                    if 'combined_topk_hidden_state' not in self.topk_expert_hidden_states[layer_idx]:
                        self.topk_expert_hidden_states[layer_idx]['combined_topk_hidden_state'] = {}
                    
                    self.topk_expert_hidden_states[layer_idx]['combined_topk_hidden_state'][batch_idx] = combined_topk
    def forward(self, input_ids):
        """
        Run a forward pass through the model and collect all hook data.
        
        Args:
            input_ids: Token IDs to process
            
        Returns:
            Dict containing all collected data
        """
        # Clear previous data
        self.router_logits.clear()
        self.layer_hidden_states.clear()
        self.top1_expert_hidden_states.clear()
        self.topk_expert_hidden_states.clear()
        self.expert_outputs.clear()
        self.residual_streams.clear()
        
        # Ensure input is on the correct device
        input_ids = input_ids.to(self.device)
        
        # Run model forward pass with appropriate optimizations
        with torch.no_grad():
            if self.device.type == 'cuda':
                # Use mixed precision on GPU
                with torch.cuda.amp.autocast():
                    outputs = self.model(input_ids)
                # Clear cache immediately after forward pass
                torch.cuda.empty_cache()
            else:
                outputs = self.model(input_ids)
            
        # Match expert outputs to get hidden states
        self._match_expert_outputs()
        
        # Return processed results
        return {
            'layer_hidden_states': self.layer_hidden_states,
            'router_logits': self.router_logits,
            'top1_expert': self.top1_expert_hidden_states,
            'topk_expert': self.topk_expert_hidden_states,
            'residual_streams': self.residual_streams
        }
    
    def remove_hooks(self):
        """Remove all hooks from the model."""
        for hook in self.hooks:
            hook.remove()
        self.hooks = []
        
        # Clear CUDA cache if using GPU
        if self.device.type == 'cuda':
            torch.cuda.empty_cache()
    
    def __del__(self):
        """Cleanup when object is deleted."""
        self.remove_hooks()

def get_last_token_moe_data(model, tokenizer, prompt, k_experts=6):
    """
    Helper function to get MoE data for the last token of a sequence.
    
    Args:
        model: DeepSeek model
        tokenizer: DeepSeek tokenizer
        prompt: Input text prompt
        k_experts: Number of top experts to track (default: 6)
        
    Returns:
        Dict containing all collected MoE data
    """
    # Create hook
    hook = DeepseekMoEHook(model, k_experts=k_experts)
    
    # Prepare input
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(DEVICE)
    
    try:
        # Get data
        moe_data = hook.forward(input_ids)
        
        # Add prompt information
        moe_data['prompt'] = prompt
        moe_data['input_ids'] = input_ids
        moe_data['last_token_id'] = input_ids[0, -1].item()
        moe_data['last_token'] = tokenizer.decode([moe_data['last_token_id']])
        
        return moe_data
    finally:
        # Always remove hooks
        hook.remove_hooks()

In [11]:
# Function to process a file of prompts and calculate cosine similarities for all layers
def process_prompts_file(file_path, model, tokenizer, output_dir="cosine-sim-csv", device=DEVICE):
    # Check if device is specified, otherwise use global DEVICE if available
    if device is None:
        try:
            device = DEVICE
        except NameError:
            device = "cuda" if torch.cuda.is_available() else "cpu"
    
    print(f"Using device: {device}")
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Extract domain name from file path
    domain_name = os.path.basename(file_path).split('.')[0]
    
    # Read prompts from file
    with open(file_path, 'r', encoding='utf-8') as f:
        prompts = [line.strip() for line in f if line.strip()]
    
    # Process each prompt
    for prompt_idx, prompt in enumerate(tqdm(prompts, desc="Processing prompts")):
        # Create a unique CSV file for each prompt with domain name
        csv_filename = f"{domain_name}_prompt_{prompt_idx+1}_cos-sim.csv"
        csv_path = os.path.join(output_dir, csv_filename)
        
        # Write header to CSV
        with open(csv_path, 'w', newline='') as csvfile:
            fieldnames = ['layer', 'final_vs_top1', 'final_vs_topk', 'top1_vs_topk']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        
        # Get MoE data for the prompt
        moe_data = get_last_token_moe_data(model, tokenizer, prompt)
        
        # Calculate cosine similarities for each layer
        results = []
        num_layers = 27  # Assuming 27 layers (1-27)
        
        for layer_idx in range(1, num_layers + 1):
            # Skip if layer doesn't exist in the data
            if layer_idx >= len(moe_data['layer_hidden_states']):
                continue
                
            # Get hidden states for this layer
            final_hidden_state = moe_data['layer_hidden_states'][layer_idx]
            
            # Get top-1 expert data
            try:
                top1_expert_hidden_state = moe_data['top1_expert'][layer_idx]['expert_hidden_states'][0]
                
                # Get combined top-k expert data
                # Check if we need to recompute the combined top-k hidden state
                if 'expert_hidden_states' in moe_data['topk_expert'][layer_idx] and 'expert_weights' in moe_data['topk_expert'][layer_idx]:
                    # Get expert states and weights
                    expert_states = moe_data['topk_expert'][layer_idx]['expert_hidden_states']
                    expert_weights = moe_data['topk_expert'][layer_idx]['expert_weights'][0]
                    expert_indices = moe_data['topk_expert'][layer_idx].get('expert_indices', list(range(len(expert_states))))
                    
                    # Compute combined top-k hidden state
                    hidden_dim = expert_states[0].shape[-1]
                    combined_topk = torch.zeros(hidden_dim, device=device)
                    total_weight = 0.0
                    
                    for k_idx, expert_idx in enumerate(expert_indices):
                        if k_idx in expert_states:
                            expert_state = expert_states[k_idx]
                            expert_weight = expert_weights[k_idx]
                            combined_topk += expert_weight * expert_state
                            total_weight += expert_weight
                    
                    # Normalize if weights don't sum to 1.0
                    if total_weight > 0 and abs(total_weight - 1.0) > 1e-5:
                        combined_topk /= total_weight
                        
                    combined_topk_hidden_state = combined_topk
                else:
                    # Use the pre-computed combined top-k hidden state
                    combined_topk_hidden_state = moe_data['topk_expert'][layer_idx]['combined_topk_hidden_state'][0]
                
                # Normalize vectors for cosine similarity
                def normalize_vector(vector):
                    return vector / vector.norm(p=2, dim=-1, keepdim=True)
                
                final_hidden_norm = normalize_vector(final_hidden_state)
                top1_expert_norm = normalize_vector(top1_expert_hidden_state)
                combined_topk_norm = normalize_vector(combined_topk_hidden_state)
                
                # Calculate cosine similarities - fix the tensor dimension issue
                # Make sure tensors are flattened to 1D before computing cosine similarity
                cos_sim_final_top1 = F.cosine_similarity(
                    final_hidden_norm.flatten().unsqueeze(0), 
                    top1_expert_norm.flatten().unsqueeze(0)
                ).item()
                
                cos_sim_final_topk = F.cosine_similarity(
                    final_hidden_norm.flatten().unsqueeze(0), 
                    combined_topk_norm.flatten().unsqueeze(0)
                ).item()
                
                cos_sim_top1_topk = F.cosine_similarity(
                    top1_expert_norm.flatten().unsqueeze(0), 
                    combined_topk_norm.flatten().unsqueeze(0)
                ).item()
                
                # Store results
                results.append({
                    'layer': layer_idx,
                    'final_vs_top1': cos_sim_final_top1,
                    'final_vs_topk': cos_sim_final_topk,
                    'top1_vs_topk': cos_sim_top1_topk
                })
            except (KeyError, IndexError) as e:
                print(f"Error processing layer {layer_idx} for prompt {prompt_idx+1}: {e}")
                continue
        
        # Write results to CSV
        with open(csv_path, 'a', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writerows(results)
        
        print(f"Results for prompt {prompt_idx+1} saved to {csv_path}")
        
        # Clear CUDA cache if using GPU
        if DEVICE == "cuda":
            torch.cuda.empty_cache()
        
        # Clean model memory between prompts
        model.zero_grad(set_to_none=True)
        gc.collect()  # Force garbage collection
        if DEVICE == "cuda":
            torch.cuda.empty_cache()
    
    print(f"Processing complete. Results saved to {output_dir}")


In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_cosine_similarities(csv_file, prompt_num=1):
    """
    Plot cosine similarities from a CSV file.
    
    Args:
        csv_file (str): Path to the CSV file containing cosine similarity data
        prompt_num (int): Prompt number for the title
    
    Returns:
        plotly.graph_objects.Figure: The generated figure
    """
    # Load the CSV data
    df = pd.read_csv(csv_file)
    
    # Create a plotly figure
    fig = make_subplots()
    
    # Add traces for each metric
    fig.add_trace(
        go.Scatter(
            x=df['layer'], 
            y=df['final_vs_top1'], 
            mode='lines+markers',
            name='Final vs Top1'
        )
    )
    
    fig.add_trace(
        go.Scatter(
            x=df['layer'], 
            y=df['final_vs_topk'], 
            mode='lines+markers',
            name='Final vs TopK'
        )
    )
    
    fig.add_trace(
        go.Scatter(
            x=df['layer'], 
            y=df['top1_vs_topk'], 
            mode='lines+markers',
            name='Top1 vs TopK'
        )
    )
    
    # Update layout
    fig.update_layout(
        title=f'Cosine Similarities Across Layers for Prompt {prompt_num}',
        xaxis_title='Layer',
        yaxis_title='Cosine Similarity',
        legend_title='Metrics',
        hovermode='x unified',
        template='plotly_white'
    )
    
    # Ensure x-axis shows all layer numbers
    fig.update_xaxes(tickmode='array', tickvals=df['layer'])
    
    return fig

# Example usage
csv_file = 'cosine-sim-csv/prompt_1_cosine_similarities.csv'
fig = plot_cosine_similarities(csv_file)
fig.show()


In [12]:
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from glob import glob

def average_cosine_similarities(base_name, output_dir="cosine-sim-csv-avg"):
    """
    Averages the cosine similarities across all files with the same base name pattern.
    
    Args:
        base_name (str): The base name pattern to match files (e.g., 'aime-math')
        output_dir (str): Directory to save the averaged results
    
    Returns:
        pd.DataFrame: DataFrame containing the averaged cosine similarities
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Find all matching CSV files
    pattern = f"cosine-sim-csv/{base_name}_chunked_prompt_*_cos-sim.csv"
    csv_files = glob(pattern)
    
    if not csv_files:
        print(f"No files found matching pattern: {pattern}")
        return None
    
    print(f"Found {len(csv_files)} files matching pattern: {pattern}")
    
    # Initialize lists to store data
    all_top1_vs_topk = []
    
    # Read each CSV file and extract the top1_vs_topk column
    for file in csv_files:
        df = pd.read_csv(file)
        all_top1_vs_topk.append(df['top1_vs_topk'].values)
    
    # Convert to numpy array for easier manipulation
    all_top1_vs_topk = np.array(all_top1_vs_topk)
    
    # Calculate average
    avg_top1_vs_topk = np.mean(all_top1_vs_topk, axis=0)
    
    # Create a new DataFrame with the averaged data
    result_df = pd.DataFrame({
        'layer': df['layer'],  # Using the layer numbers from the last loaded file
        'avg_top1_vs_topk': avg_top1_vs_topk
    })
    
    # Save the averaged data to a CSV file
    output_file = f"{output_dir}/{base_name}_averaged_top1_vs_topk.csv"
    result_df.to_csv(output_file, index=False)
    print(f"Averaged data saved to {output_file}")
    
    # return result_df

def plot_averaged_similarities(base_name, output_dir="cosine-sim-csv-avg"):
    """
    Plots the averaged cosine similarities.
    
    Args:
        base_name (str): The base name pattern used to generate the averaged data
        output_dir (str): Directory where the averaged data is saved
    
    Returns:
        plotly.graph_objects.Figure: The plotly figure object
    """
    # Get the averaged data
    df = average_cosine_similarities(base_name, output_dir)
    
    if df is None:
        return None
    
    # Create figure
    fig = go.Figure()
    
    fig.add_trace(
        go.Scatter(
            x=df['layer'], 
            y=df['avg_top1_vs_topk'], 
            mode='lines+markers',
            name='Avg Top1 vs TopK'
        )
    )
    
    # Update layout
    fig.update_layout(
        title=f'Average Top1 vs TopK Cosine Similarities Across Layers for {base_name}',
        xaxis_title='Layer',
        yaxis_title='Average Cosine Similarity',
        hovermode='x unified',
        template='plotly_white'
    )
    
    # Ensure x-axis shows all layer numbers
    fig.update_xaxes(tickmode='array', tickvals=df['layer'])
    
    return fig


In [None]:
def plot_combined_similarities(base_names, csv_dir="cosine-sim-csv-avg"):
    """
    Plot combined graph of top1_vs_topk similarities for multiple datasets
    
    Args:
        base_names: List of dataset base names
        csv_dir: Directory containing the averaged CSV files
    
    Returns:
        Plotly figure object
    """
    import pandas as pd
    import plotly.graph_objects as go
    import os
    
    fig = go.Figure()
    
    for base_name in base_names:
        # Construct the file path
        file_path = os.path.join(csv_dir, f"{base_name}_averaged_top1_vs_topk.csv")
        
        # Check if file exists
        if not os.path.exists(file_path):
            print(f"Warning: File {file_path} not found. Skipping.")
            continue
        
        # Read the CSV file
        df = pd.read_csv(file_path)
        
        # Add trace for this dataset
        fig.add_trace(
            go.Scatter(
                x=df['layer'], 
                y=df['avg_top1_vs_topk'], 
                mode='lines+markers',
                name=base_name
            )
        )
    
    # Update layout
    fig.update_layout(
        title='Comparison of Top1 vs TopK Cosine Similarities Across Datasets',
        xaxis_title='Layer',
        yaxis_title='Average Cosine Similarity',
        hovermode='x unified',
        template='plotly_white',
        legend_title="Dataset"
    )
    
    # Ensure x-axis shows appropriate layer numbers
    # Using the last loaded dataframe for x-axis values
    if 'df' in locals():
        fig.update_xaxes(tickmode='array', tickvals=df['layer'])
    
    return fig




In [None]:
file_path = ['data-ext/aime-math_chunked.txt',
             'data-ext/gsm8k_chunked.txt',
             'data-ext/github_chunked.txt',
             'data-ext/chinese_chunked.txt',
             'data-ext/arxiv_title_abstract_chunked.txt',
             'data-ext/english_chunked.txt',
             'data-ext/french-qa_chunked.txt']
for file in file_path:
    process_prompts_file(file, model, tokenizer, output_dir="cosine-sim-csv")
    print(f"Processed {file}")
    if DEVICE == "cuda":
        torch.cuda.empty_cache()
    gc.collect()  # Force garbage collection



In [None]:
base_name = ['aime-math',
            'gsm8k',
            'github',
            'chinese',
            'arxiv_title_abstract',
            'english',
            'french-qa']

for base in base_name:
    average_cosine_similarities(base)

In [None]:
# Plot combined graph for all datasets
combined_fig = plot_combined_similarities(base_name)
combined_fig.show()