# More systematic analysis: how to calculate "effective depth"

Here we analyze and plot the results of effective depth across different models. We follow "Do Language Models Use Their Depth Efficiently?" by Csordás et al. (2025) and use the following metrics:
1. Relative norm contribution & cosine similarity
2. The effects of the layer on future computations
3. Logit lens overlap
4. Residual erasure experiment
5. Integrated gradients

In [23]:
import torch
import pandas as pd
from typing import Dict, List

In [24]:
models = [
    # 1.5B models
    "DeepSeek-R1-Distill-Qwen-1.5B",
    "Qwen2.5-1.5B-Instruct",
    "Qwen2.5-Math-1.5B",
    # 7B models
    "DeepSeek-R1-Distill-Qwen-7B",
    "Qwen2.5-7B-Instruct",
    "Qwen2.5-Math-7B",
    # 14B models
    "DeepSeek-R1-Distill-Qwen-14B",
    "Qwen2.5-14B-Instruct",
    "Qwen2.5-14B",
    # 32B models
    "DeepSeek-R1-Distill-Qwen-32B",
    "Qwen2.5-32B-Instruct",
    "Qwen2.5-32B"
]
    

In [25]:
def calc_ed_cosine_similarity(att_cos, mlp_cos, layer_cos):
    '''
    Calculate the effective depth through cosine similarity metric.
    '''
    mean_effect = torch.mean(torch.stack([att_cos, mlp_cos, layer_cos]), dim=0)
    ll = []
    for l in range(mean_effect.shape[0]-1):
        if mean_effect[l] < 0 and mean_effect[l + 1] >= 0:
            ll.append(l)
    return ll[-1]

def calc_ed_logitlens_kl(res_kl_divs, threshold=0.1):
    '''
    Calculate the effective depth through logit lens KL divergence metric.
    '''
    layer = res_kl_divs.shape[0]
    for l in range(layer - 1):
        if res_kl_divs[l] < threshold * res_kl_divs.max():
            return l
    return -1

def calc_ed_logitlens_overlap(res_overlap, threshold=0.2):
    '''
    Calculate the effective depth through logit lens overlap metric.
    '''
    layer = res_overlap.shape[0]
    for l in range(layer - 1):
        if res_overlap[l] > threshold:
            return l
    return -1

In [None]:
import torch
import pandas as pd
from typing import Dict, List

def calculate_all_metrics(models: List[str]) -> Dict[str, Dict]:
    results = {}
    datasets = ["hellaswag", "gsm8k", "aime24"]
    
    print(f"{'Model':30s} @ {'Dataset':10s}: {'ED (cos)':8s} {'ED (KL)':8s} {'ED (Overlap)':8s}")
    print("-" * 80)
    
    for model in models:
        results[model] = {}
        for dataset in datasets:
            try:
                model_results = {}
                
                try:
                    cos_path = f"outputs/{dataset}/{model}/relative_contribution.pt"
                    cos_data = torch.load(cos_path)
                    att_cos = cos_data["att_cos"]
                    mlp_cos = cos_data["mlp_cos"]
                    layer_cos = cos_data["layer_cos"]
                    ed_cos = calc_ed_cosine_similarity(att_cos, mlp_cos, layer_cos)
                    layer_cos_count = att_cos.shape[0]
                    ratio_cos = (ed_cos + 1) / layer_cos_count
                    model_results.update({
                        f"cosine_ed": ed_cos,
                        f"cosine_ratio": ratio_cos,
                        f"cosine_layers": layer_cos_count
                    })
                except Exception as e:
                    print(f"  Cosine error for {model} @ {dataset}: {e}")
                    model_results.update({
                        f"cosine_ed": None,
                        f"cosine_ratio": None,
                        f"cosine_layers": None
                    })
                
                try:
                    if dataset == "aime24":
                        logit_path = f"outputs/{dataset}/{model}/logitlens_5exps.pt"
                    else:
                        logit_path = f"outputs/{dataset}/{model}/logitlens_10exps.pt"
                    logit_data = torch.load(logit_path)
                    res_kl_divs = logit_data["res_kl_divs"]
                    ed_kl = calc_ed_logitlens_kl(res_kl_divs, threshold=0.5)
                    layer_kl = res_kl_divs.shape[0]
                    ratio_kl = (ed_kl + 1) / layer_kl
                    model_results.update({
                        f"kl_ed": ed_kl,
                        f"kl_ratio": ratio_kl,
                        f"kl_layers": layer_kl
                    })
                except Exception as e:
                    print(f"  LogitLens KL error for {model} @ {dataset}: {e}")
                    model_results.update({
                        f"kl_ed": None,
                        f"kl_ratio": None,
                        f"kl_layers": None
                    })
                
                try:
                    if dataset == "aime24":
                        logit_path = f"outputs/{dataset}/{model}/logitlens_5exps.pt"
                    else:
                        logit_path = f"outputs/{dataset}/{model}/logitlens_10exps.pt"
                    logit_data = torch.load(logit_path)
                    res_overlaps = logit_data["res_overlaps"]
                    ed_overlap = calc_ed_logitlens_overlap(res_overlaps, threshold=0.3)
                    layer_overlap = res_overlaps.shape[0]
                    ratio_overlap = (ed_overlap + 1) / layer_overlap
                    model_results.update({
                        f"overlap_ed": ed_overlap,
                        f"overlap_ratio": ratio_overlap,
                        f"overlap_layers": layer_overlap
                    })
                except Exception as e:
                    print(f"  LogitLens Overlap error for {model} @ {dataset}: {e}")
                    model_results.update({
                        f"overlap_ed": None,
                        f"overlap_ratio": None,
                        f"overlap_layers": None
                    })
                
                for key, value in model_results.items():
                    results[model][f"{dataset}_{key}"] = value
                
                cos_str = f"{ed_cos:2d}({ratio_cos:.2f})" if model_results['cosine_ed'] is not None else "N/A"
                kl_str = f"{ed_kl:2d}({ratio_kl:.2f})" if model_results['kl_ed'] is not None else "N/A"
                overlap_str = f"{ed_overlap:2d}({ratio_overlap:.2f})" if model_results['overlap_ed'] is not None else "N/A"
                
                print(f"{model:30s} @ {dataset:10s}: {cos_str:8s} {kl_str:8s} {overlap_str:8s}")
                
            except Exception as e:
                print(f"Major error processing {model} @ {dataset}: {e}")
                # 为这个dataset添加空值
                for metric in ['cosine', 'kl', 'overlap']:
                    for suffix in ['_ed', '_ratio', '_layers']:
                        results[model][f"{dataset}_{metric}{suffix}"] = None
    
    return results

def create_excel_table(results: Dict[str, Dict], output_file: str = "effective_depth_results.xlsx"):
    
    datasets = ["hellaswag", "gsm8k", "aime24"]
    metrics = ['cosine', 'kl', 'overlap']
    value_types = ['ed', 'ratio']
    
    columns = []
    for metric in metrics:
        for dataset in datasets:
            for value_type in value_types:
                columns.append((metric, dataset, value_type.upper()))
    
    df = pd.DataFrame(index=list(results.keys()), columns=pd.MultiIndex.from_tuples(columns))
    
    for model in results:
        for dataset in datasets:
            for metric in metrics:
                for value_type in value_types:
                    key = f"{dataset}_{metric}_{value_type}"
                    value = results[model].get(key, None)
                    df.loc[model, (metric, dataset, value_type.upper())] = value
    
    for col in df.columns:
        if col[2] == 'RATIO':
            df[col] = df[col].apply(lambda x: round(x, 2) if isinstance(x, (int, float)) else x)
    
    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name='Effective Depth Results')
        
        info_df = pd.DataFrame({
            'Metric': ['cosine', 'kl', 'overlap'],
            'Description': [
                'Cosine Similarity based effective depth',
                'LogitLens KL divergence based effective depth',
                'LogitLens Overlap based effective depth'
            ],
            'Format': ['ED(Ratio)', 'ED(Ratio)', 'ED(Ratio)']
        })
        info_df.to_excel(writer, sheet_name='Description', index=False)
    
    print(f"\nResults saved to {output_file}")
    
    return df


print("Calculating All Metrics:")
all_results = calculate_all_metrics(models)

df = create_excel_table(all_results)

print("\nExcel Table Preview:")
print(df.head())

Calculating All Metrics:
Model                          @ Dataset   : ED (cos) ED (KL)  ED (Overlap)
--------------------------------------------------------------------------------
DeepSeek-R1-Distill-Qwen-1.5B  @ hellaswag : 17(0.64) 20(0.75) 23(0.86)
DeepSeek-R1-Distill-Qwen-1.5B  @ gsm8k     : 16(0.61)  1(0.07) 23(0.86)
DeepSeek-R1-Distill-Qwen-1.5B  @ aime24    : 17(0.64) 24(0.89) 24(0.89)
Qwen2.5-1.5B-Instruct          @ hellaswag : 16(0.61) 21(0.79) 23(0.86)
Qwen2.5-1.5B-Instruct          @ gsm8k     : 20(0.75) 22(0.82) 23(0.86)
Qwen2.5-1.5B-Instruct          @ aime24    : 19(0.71) 23(0.86) 23(0.86)
Qwen2.5-Math-1.5B              @ hellaswag : 16(0.61) 20(0.75) 23(0.86)
Qwen2.5-Math-1.5B              @ gsm8k     : 16(0.61) 22(0.82) 23(0.86)
Qwen2.5-Math-1.5B              @ aime24    : 16(0.61) 23(0.86) 23(0.86)
DeepSeek-R1-Distill-Qwen-7B    @ hellaswag : 16(0.61) 24(0.89) 25(0.93)
DeepSeek-R1-Distill-Qwen-7B    @ gsm8k     : 16(0.61) 24(0.89) 25(0.93)
DeepSeek-R1-Distill-Qwen-7