# Layer Similarity: English and Multilingual MMLU

Visualize layer-level similarity matrices generated for the standard English MMLU tasks and the multilingual Global-MMLU translations.

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def load_similarity_mats(base_dir, filenames):
    mats = []
    for name in filenames:
        path = Path(base_dir) / name
        mats.append(np.load(path))
    return mats


def plot_similarity_grid(mats, titles, suptitle):
    vmin = min(m.min() for m in mats)
    vmax = max(m.max() for m in mats)
    fig, axes = plt.subplots(1, len(mats), figsize=(3.2 * len(mats), 4), sharex=False, sharey=False)
    axes = np.atleast_1d(axes).ravel()

    for ax, mat, title in zip(axes, mats, titles):
        im = ax.imshow(mat, cmap="coolwarm", vmin=vmin, vmax=vmax)
        ax.set_title(title, fontsize=12, fontweight="bold")
        ax.set_xlabel("Layer")
        ax.set_ylabel("Layer")

    fig.suptitle(suptitle, fontsize=14, fontweight="bold", y=0.95)
    fig.subplots_adjust(left=0.05, right=0.88, top=0.86, bottom=0.12, wspace=0.25)

    cbar_ax = fig.add_axes([0.9, 0.2, 0.02, 0.6])
    fig.colorbar(im, cax=cbar_ax, label="Similarity")
    plt.show()


## English MMLU Tasks

Layer similarity matrices for the five grouped MMLU subject clusters generated with 40 prompts each.

In [None]:
english_dir = Path("..") / "outputs" / "mmlu"
english_tasks = ["medical", "legal", "math", "cs", "humanities"]
english_files = [f"similarity_{task}_40s.npy" for task in english_tasks]

english_mats = load_similarity_mats(english_dir, english_files)

plot_similarity_grid(
    english_mats,
    [task.capitalize() for task in english_tasks],
    "Llama3-8B Layer Similarity | English MMLU (40 samples)"
)

### Cross-task Correlation (English)

Pairwise Pearson correlation between the flattened similarity matrices gives a coarse measure of how similarly layers align across tasks.

In [None]:
english_corr = np.zeros((len(english_mats), len(english_mats)))

for i, a in enumerate(english_mats):
    for j, b in enumerate(english_mats):
        english_corr[i, j] = np.corrcoef(a.ravel(), b.ravel())[0, 1]

pd.DataFrame(english_corr, index=english_tasks, columns=english_tasks)

## Multilingual Global-MMLU (es, zh, fr)

Layer similarity matrices for the CohereLabs Global-MMLU medical translations using 20 prompts per language.

In [None]:
multilingual_dir = Path("..") / "outputs" / "mmlu_multilingual"
languages = ["es", "zh", "fr"]
language_labels = ["Spanish (es)", "Chinese (zh)", "French (fr)"]
multilingual_files = [f"similarity_medical_{lang}_20s.npy" for lang in languages]

multilingual_mats = load_similarity_mats(multilingual_dir, multilingual_files)

plot_similarity_grid(
    multilingual_mats,
    language_labels,
    "Llama3-8B Layer Similarity | Global-MMLU Medical (20 samples)"
)

### Cross-language Correlation (Multilingual)

Correlating the multilingual similarity matrices highlights how consistent the diffusion embedding structure is across translated prompts.

In [None]:
multilingual_corr = np.zeros((len(multilingual_mats), len(multilingual_mats)))

for i, a in enumerate(multilingual_mats):
    for j, b in enumerate(multilingual_mats):
        multilingual_corr[i, j] = np.corrcoef(a.ravel(), b.ravel())[0, 1]

pd.DataFrame(multilingual_corr, index=language_labels, columns=language_labels)