In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import glob
import warnings
warnings.filterwarnings('ignore')

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (15, 5)
plt.rcParams['font.size'] = 10


In [2]:
LANGUAGES = ['bn', 'en', 'gu', 'hi', 'kn', 'ml', 'mr', 'ne', 'ta', 'te']
LANGUAGE_NAMES = {
    'bn': 'Bengali', 'en': 'English', 'gu': 'Gujarati',
    'hi': 'Hindi', 'kn': 'Kannada', 'ml': 'Malayalam',
    'mr': 'Marathi', 'ne': 'Nepali', 'ta': 'Tamil', 'te': 'Telugu'
}

# Color scheme matching the legend
LANGUAGE_COLORS = {
    'bn': '#5B8CBE',  # Bengali - Blue
    'en': '#E89045',  # English - Orange
    'gu': '#64A95F',  # Gujarati - Green
    'hi': '#C85450',  # Hindi - Red
    'kn': '#9D7FB8',  # Kannada - Purple
    'ml': '#8B6F47',  # Malayalam - Brown
    'mr': '#D992C2',  # Marathi - Pink
    'ne': '#7F7F7F',  # Nepali - Gray
    'ta': '#B8B833',  # Tamil - Yellow-green
    'te': '#5DBED7',  # Telugu - Cyan
}

LENS_TYPES = ['logitlens', 'tunedlens']

BASE_DIR = Path('/mnt/storage/deeksha/indictunedlens/results_with_matches')
OUTPUT_DIR = Path('/mnt/storage/deeksha/indictunedlens/plots')
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

PLOT_MODE = 'save'   

In [3]:
def show_or_save(filename=None):
    if PLOT_MODE == 'save' and filename:
        plt.savefig(OUTPUT_DIR / filename, dpi=300, bbox_inches='tight')
        plt.close()
        print(f"✓ Saved: {filename}")
    elif PLOT_MODE == 'show':
        plt.show()
    else:
        plt.close()


In [4]:
def create_combined_csv(language, lens_type):
    lang_dir = BASE_DIR / f'm_mmlu_{language}'
    if not lang_dir.exists():
        print(f"✗ Missing directory: {lang_dir}")
        return None

    pattern = str(lang_dir / f'{lens_type}_logits_probabilities_*.csv')
    files = glob.glob(pattern)

    if not files:
        print(f"✗ No files found for {language} - {lens_type}")
        return None

    dfs = []
    for f in files:
        try:
            dfs.append(pd.read_csv(f))
        except Exception as e:
            print(f"✗ Error reading {f}: {e}")

    if not dfs:
        return None

    combined_df = pd.concat(dfs, ignore_index=True)
    out_file = lang_dir / f'{lens_type}_combined_logits_probabilities.csv'
    combined_df.to_csv(out_file, index=False, encoding='utf-8')
    print(f"✓ Created combined CSV: {out_file}")

    return combined_df

In [5]:
def ensure_all_combined_csvs():
    print("=" * 80)
    print("ENSURING ALL COMBINED CSV FILES EXIST")
    print("=" * 80)

    for lang in LANGUAGES:
        for lens in LENS_TYPES:
            combined = BASE_DIR / f'm_mmlu_{lang}' / f'{lens}_combined_logits_probabilities.csv'
            if not combined.exists():
                create_combined_csv(lang, lens)

    print("✓ All combined CSVs ensured")
    print("=" * 80)

In [6]:
CSV_CACHE = {}

def preload_all_csvs():
    print("=" * 80)
    print("PRELOADING ALL CSV FILES INTO MEMORY")
    print("=" * 80)

    for lang in LANGUAGES:
        for lens in LENS_TYPES:
            path = BASE_DIR / f'm_mmlu_{lang}' / f'{lens}_combined_logits_probabilities.csv'
            if path.exists():
                CSV_CACHE[(lang, lens)] = pd.read_csv(path)

    print(f"✓ Loaded {len(CSV_CACHE)} DataFrames into memory")
    print("=" * 80)

def get_df(language, lens_type):
    return CSV_CACHE[(language, lens_type)]

In [7]:
ensure_all_combined_csvs()
preload_all_csvs()

ENSURING ALL COMBINED CSV FILES EXIST
✓ All combined CSVs ensured
PRELOADING ALL CSV FILES INTO MEMORY
✓ Loaded 20 DataFrames into memory


In [8]:
def calculate_entropy(probabilities):
    probs = np.array(probabilities) + 1e-10
    return -np.sum(probs * np.log(probs))

# %%
def plot_entropy_heatmap(language, lens_type='tunedlens'):
    df = get_df(language, lens_type)

    entropy_data = []
    for (layer, pos), g in df.groupby(['layer', 'token_position']):
        probs = g.sort_values('rank')['probability'].values[:10]
        entropy_data.append({
            'layer': layer,
            'token_position': pos,
            'entropy': calculate_entropy(probs)
        })

    heatmap = pd.DataFrame(entropy_data).pivot_table(
        index='layer', columns='token_position', values='entropy'
    )

    plt.figure(figsize=(12, 8))
    sns.heatmap(heatmap, cmap='YlOrRd', cbar_kws={'label': 'Entropy'})
    plt.title(f'Entropy Heatmap - {LANGUAGE_NAMES[language]} ({lens_type})')
    plt.xlabel('Token Position')
    plt.ylabel('Layer')

    show_or_save(f'entropy_heatmap_{language}_{lens_type}.png')

In [9]:
for lang in LANGUAGES:
    plot_entropy_heatmap(lang)

✓ Saved: entropy_heatmap_bn_tunedlens.png
✓ Saved: entropy_heatmap_en_tunedlens.png
✓ Saved: entropy_heatmap_gu_tunedlens.png
✓ Saved: entropy_heatmap_hi_tunedlens.png
✓ Saved: entropy_heatmap_kn_tunedlens.png
✓ Saved: entropy_heatmap_ml_tunedlens.png
✓ Saved: entropy_heatmap_mr_tunedlens.png
✓ Saved: entropy_heatmap_ne_tunedlens.png
✓ Saved: entropy_heatmap_ta_tunedlens.png
✓ Saved: entropy_heatmap_te_tunedlens.png


In [10]:
def plot_rank_accuracy_heatmap_comparison(language):
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))

    for i, lens in enumerate(LENS_TYPES):
        df = get_df(language, lens)

        correct = df[df['match_gold_answer']].groupby(['layer', 'rank']).size()
        total = df.groupby(['layer', 'rank']).size()

        acc = (correct / total).fillna(0).reset_index(name='accuracy')
        heatmap = acc.pivot(index='layer', columns='rank', values='accuracy').fillna(0)

        sns.heatmap(
            heatmap[[r for r in range(1, 11) if r in heatmap.columns]],
            ax=axes[i],
            cmap='Blues',
            annot=True,
            fmt='.3f',
            vmin=0,
            vmax=0.20
        )

        axes[i].set_title('Logit Lens' if lens == 'logitlens' else 'Indic-TunedLens')
        axes[i].set_xlabel('Rank')
        axes[i].set_ylabel('Layer')

    plt.suptitle(f'{LANGUAGE_NAMES[language]}: Rank Accuracy Comparison', fontsize=14)
    plt.tight_layout()
    show_or_save(f'rank_accuracy_heatmap_{language}.png')


In [11]:
for lang in LANGUAGES:
    plot_rank_accuracy_heatmap_comparison(lang)

✓ Saved: rank_accuracy_heatmap_bn.png
✓ Saved: rank_accuracy_heatmap_en.png
✓ Saved: rank_accuracy_heatmap_gu.png
✓ Saved: rank_accuracy_heatmap_hi.png
✓ Saved: rank_accuracy_heatmap_kn.png
✓ Saved: rank_accuracy_heatmap_ml.png
✓ Saved: rank_accuracy_heatmap_mr.png
✓ Saved: rank_accuracy_heatmap_ne.png
✓ Saved: rank_accuracy_heatmap_ta.png
✓ Saved: rank_accuracy_heatmap_te.png


In [12]:
def plot_performance_difference(languages=['hi', 'mr', 'bn']):
    """Plot performance difference between TunedLens and LogitLens"""

    fig, axes = plt.subplots(1, len(languages), figsize=(5 * len(languages), 4))
    if len(languages) == 1:
        axes = [axes]

    for idx, language in enumerate(languages):
        df_logit = get_df(language, 'logitlens')
        df_tuned = get_df(language, 'tunedlens')

        differences = []
        layers = []

        for layer in sorted(df_logit['layer'].unique()):
            logit_acc = df_logit[df_logit['layer'] == layer]['match_gold_answer'].mean()
            tuned_acc = df_tuned[df_tuned['layer'] == layer]['match_gold_answer'].mean()
            differences.append(tuned_acc - logit_acc)
            layers.append(layer)

        axes[idx].plot(layers, differences, marker='o', linewidth=2, markersize=4, 
                      color=LANGUAGE_COLORS[language])
        axes[idx].axhline(y=0, color='gray', linestyle='--', alpha=0.5)
        axes[idx].set_xlabel('Layer')
        axes[idx].set_ylabel('Accuracy Difference\n(Tuned − Logit)')
        axes[idx].set_title(LANGUAGE_NAMES[language])
        axes[idx].grid(True, alpha=0.3)

    plt.suptitle('Performance Difference: Indic-TunedLens − Logit Lens', fontsize=14)
    plt.tight_layout()
    show_or_save(f'performance_difference_{"_".join(languages)}.png')


In [13]:
for lang in LANGUAGES:
    plot_performance_difference([lang])

✓ Saved: performance_difference_bn.png
✓ Saved: performance_difference_en.png
✓ Saved: performance_difference_gu.png
✓ Saved: performance_difference_hi.png
✓ Saved: performance_difference_kn.png
✓ Saved: performance_difference_ml.png
✓ Saved: performance_difference_mr.png
✓ Saved: performance_difference_ne.png
✓ Saved: performance_difference_ta.png
✓ Saved: performance_difference_te.png


In [14]:
def plot_gold_match_accuracy(languages=['hi', 'mr', 'bn']):
    """Plot overall gold-match accuracy by layer"""

    plt.figure(figsize=(10, 6))

    for language in languages:
        df = get_df(language, 'tunedlens')
        layer_accuracy = df[df['rank'] == 1].groupby('layer')['match_gold_answer'].mean()

        plt.plot(layer_accuracy.index, layer_accuracy.values,
                 marker='o', label=LANGUAGE_NAMES[language], linewidth=2, markersize=6,
                 color=LANGUAGE_COLORS[language])

    plt.xlabel('Layer')
    plt.ylabel('Accuracy')
    plt.title('Overall Gold-Match Accuracy by Layer')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    show_or_save(f'gold_match_accuracy_{"_".join(languages)}.png')

In [15]:
for lang in LANGUAGES:
    plot_gold_match_accuracy([lang])

✓ Saved: gold_match_accuracy_bn.png
✓ Saved: gold_match_accuracy_en.png
✓ Saved: gold_match_accuracy_gu.png
✓ Saved: gold_match_accuracy_hi.png
✓ Saved: gold_match_accuracy_kn.png
✓ Saved: gold_match_accuracy_ml.png
✓ Saved: gold_match_accuracy_mr.png
✓ Saved: gold_match_accuracy_ne.png
✓ Saved: gold_match_accuracy_ta.png
✓ Saved: gold_match_accuracy_te.png


In [16]:
def plot_top1_probability(languages=['hi', 'mr', 'bn']):
    """Plot average top-1 prediction probability by layer"""

    plt.figure(figsize=(10, 6))

    for language in languages:
        df = get_df(language, 'tunedlens')
        top1_probs = df[df['rank'] == 1].groupby('layer')['probability'].mean()

        plt.plot(top1_probs.index, top1_probs.values,
                 marker='o', label=LANGUAGE_NAMES[language], linewidth=2, markersize=6,
                 color=LANGUAGE_COLORS[language])

    plt.xlabel('Layer')
    plt.ylabel('Mean Probability')
    plt.title('Average Top-1 Prediction Probability by Layer')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    show_or_save(f'top1_probability_{"_".join(languages)}.png')


In [17]:
for lang in LANGUAGES:
    plot_top1_probability([lang])

✓ Saved: top1_probability_bn.png
✓ Saved: top1_probability_en.png
✓ Saved: top1_probability_gu.png
✓ Saved: top1_probability_hi.png
✓ Saved: top1_probability_kn.png
✓ Saved: top1_probability_ml.png
✓ Saved: top1_probability_mr.png
✓ Saved: top1_probability_ne.png
✓ Saved: top1_probability_ta.png
✓ Saved: top1_probability_te.png


In [18]:
def plot_layerwise_accuracy_comparison(languages=['hi', 'mr', 'bn']):
    """Plot layer-wise accuracy comparison"""

    fig, axes = plt.subplots(1, len(languages), figsize=(5 * len(languages), 4))
    if len(languages) == 1:
        axes = [axes]

    for idx, language in enumerate(languages):
        df_logit = get_df(language, 'logitlens')
        df_tuned = get_df(language, 'tunedlens')

        logit_acc = df_logit[df_logit['rank'] == 1].groupby('layer')['match_gold_answer'].mean()
        tuned_acc = df_tuned[df_tuned['rank'] == 1].groupby('layer')['match_gold_answer'].mean()

        axes[idx].plot(logit_acc.index, logit_acc.values,
                       marker='o', label='Logit Lens', color='gray', alpha=0.6)
        axes[idx].plot(tuned_acc.index, tuned_acc.values,
                       marker='s', label='Indic-TunedLens', color=LANGUAGE_COLORS[language])

        axes[idx].set_title(LANGUAGE_NAMES[language])
        axes[idx].set_xlabel('Layer')
        axes[idx].set_ylabel('Accuracy')
        axes[idx].legend()
        axes[idx].grid(alpha=0.3)

    plt.suptitle('Logit Lens vs Indic-TunedLens: Accuracy Comparison')
    plt.tight_layout()
    show_or_save(f'layerwise_accuracy_{"_".join(languages)}.png')

In [19]:
for lang in LANGUAGES:
    plot_layerwise_accuracy_comparison([lang])

✓ Saved: layerwise_accuracy_bn.png
✓ Saved: layerwise_accuracy_en.png
✓ Saved: layerwise_accuracy_gu.png
✓ Saved: layerwise_accuracy_hi.png
✓ Saved: layerwise_accuracy_kn.png
✓ Saved: layerwise_accuracy_ml.png
✓ Saved: layerwise_accuracy_mr.png
✓ Saved: layerwise_accuracy_ne.png
✓ Saved: layerwise_accuracy_ta.png
✓ Saved: layerwise_accuracy_te.png


In [None]:
def plot_average_rank_comparison(languages=['hi', 'mr', 'bn']):
    """Plot average rank of correct answer comparison"""

    fig, axes = plt.subplots(1, len(languages), figsize=(5 * len(languages), 4))
    if len(languages) == 1:
        axes = [axes]

    for idx, language in enumerate(languages):
        df_logit = get_df(language, 'logitlens')
        df_tuned = get_df(language, 'tunedlens')

        logit_rank = df_logit[df_logit['match_gold_answer']].groupby('layer')['rank'].mean()
        tuned_rank = df_tuned[df_tuned['match_gold_answer']].groupby('layer')['rank'].mean()

        axes[idx].plot(logit_rank.index, logit_rank.values,
                       marker='o', label='Logit Lens', color='gray', alpha=0.6)
        axes[idx].plot(tuned_rank.index, tuned_rank.values,
                       marker='s', label='Indic-TunedLens', color=LANGUAGE_COLORS[language])

        axes[idx].invert_yaxis()
        axes[idx].set_title(LANGUAGE_NAMES[language])
        axes[idx].set_xlabel('Layer')
        axes[idx].set_ylabel('Average Rank')
        axes[idx].legend()
        axes[idx].grid(alpha=0.3)

    plt.suptitle('Average Rank of Correct Answer')
    plt.tight_layout()
    show_or_save(f'average_rank_comparison_{"_".join(languages)}.png')

In [None]:
for lang in LANGUAGES:
    plot_average_rank_comparison([lang])

In [None]:
def plot_top1_probability_comparison(languages=['hi', 'mr', 'bn']):
    """Plot top-1 probability comparison"""

    fig, axes = plt.subplots(1, len(languages), figsize=(5 * len(languages), 4))
    if len(languages) == 1:
        axes = [axes]

    for idx, language in enumerate(languages):
        df_logit = get_df(language, 'logitlens')
        df_tuned = get_df(language, 'tunedlens')

        logit_prob = df_logit[df_logit['rank'] == 1].groupby('layer')['probability'].mean()
        tuned_prob = df_tuned[df_tuned['rank'] == 1].groupby('layer')['probability'].mean()

        axes[idx].plot(logit_prob.index, logit_prob.values,
                       marker='o', label='Logit Lens', color='gray', alpha=0.6)
        axes[idx].plot(tuned_prob.index, tuned_prob.values,
                       marker='s', label='Indic-TunedLens', color=LANGUAGE_COLORS[language])

        axes[idx].set_title(LANGUAGE_NAMES[language])
        axes[idx].set_xlabel('Layer')
        axes[idx].set_ylabel('Mean Top-1 Probability')
        axes[idx].legend()
        axes[idx].grid(alpha=0.3)

    plt.suptitle('Top-1 Probability Comparison')
    plt.tight_layout()
    show_or_save(f'top1_probability_comparison_{"_".join(languages)}.png')

In [None]:
for lang in LANGUAGES:
    plot_top1_probability_comparison([lang])


In [None]:
def plot_accuracy_by_token_position(languages=['hi', 'mr', 'bn']):
    """Plot accuracy by token position"""

    fig, axes = plt.subplots(1, len(languages), figsize=(5 * len(languages), 4))
    if len(languages) == 1:
        axes = [axes]

    for idx, language in enumerate(languages):
        df = get_df(language, 'tunedlens')
        position_acc = df[df['rank'] == 1].groupby('token_position')['match_gold_answer'].mean()

        axes[idx].plot(position_acc.index, position_acc.values, linewidth=2,
                      color=LANGUAGE_COLORS[language])
        axes[idx].set_title(LANGUAGE_NAMES[language])
        axes[idx].set_xlabel('Token Position')
        axes[idx].set_ylabel('Accuracy')
        axes[idx].grid(alpha=0.3)

    plt.suptitle('Accuracy by Token Position')
    plt.tight_layout()
    show_or_save(f'accuracy_by_token_position_{"_".join(languages)}.png')



In [None]:
for lang in LANGUAGES:
    plot_accuracy_by_token_position([lang])

In [None]:
def plot_average_rank_by_layer(languages=['hi', 'mr', 'bn']):
    """Plot average rank of correct answer by layer"""

    plt.figure(figsize=(10, 6))

    for language in languages:
        df = get_df(language, 'tunedlens')
        avg_rank = df[df['match_gold_answer']].groupby('layer')['rank'].mean()

        plt.plot(avg_rank.index, avg_rank.values,
                 marker='o', label=LANGUAGE_NAMES[language], linewidth=2,
                 color=LANGUAGE_COLORS[language])

    plt.xlabel('Layer')
    plt.ylabel('Average Rank')
    plt.title('Average Rank of Correct Answer by Layer')
    plt.legend()
    plt.grid(alpha=0.3)
    plt.gca().invert_yaxis()
    plt.tight_layout()
    show_or_save(f'average_rank_by_layer_{"_".join(languages)}.png')

In [None]:
for lang in LANGUAGES:
    plot_average_rank_by_layer([lang])