In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.colors import Normalize

# Random 7x7 symmetric attention matrix with 1s on the diagonal
n = 7
rng = np.random.default_rng()  # e.g. np.random.default_rng(42) for reproducible
R = rng.random((n, n))
A = (R + R.T) / 2.0
np.fill_diagonal(A, 1.0)

# Layout in millimeters (data units)
cell_mm = 15.0
gap_mm  = 1.0

W_mm = n * cell_mm + (n - 1) * gap_mm
H_mm = W_mm

# Sharper render
DPI_SCREEN = 500   # sharper display
DPI_SAVE   = 900   # very sharp PNG export
plt.rcParams["path.simplify"] = False  # keep crisp edges

mm_to_in = 1.0 / 25.4
fig_w_in = (W_mm + 30.0) * mm_to_in
fig_h_in = (H_mm + 30.0) * mm_to_in

fig, ax = plt.subplots(figsize=(fig_w_in, fig_h_in), dpi=DPI_SCREEN)

# Color mapping (lighten by blending with white)
norm = Normalize(vmin=float(A.min()), vmax=float(A.max()))
cmap = plt.get_cmap()  # keep matplotlib default colormap

WHITEN = 0.35  # increase (e.g. 0.5) to make colors even lighter

def lighten_rgba(rgba, whiten=0.35):
    r, g, b, a = rgba
    r = r * (1 - whiten) + 1.0 * whiten
    g = g * (1 - whiten) + 1.0 * whiten
    b = b * (1 - whiten) + 1.0 * whiten
    return (r, g, b, a)

# Draw each cell as a separate rectangle, leaving gaps between cells
for i in range(n):
    for j in range(n):
        x = j * (cell_mm + gap_mm)
        y = i * (cell_mm + gap_mm)

        rgba = lighten_rgba(cmap(norm(A[i, j])), WHITEN)

        rect = Rectangle(
            (x, y),
            cell_mm,
            cell_mm,
            facecolor=rgba,
            edgecolor="black",
            linewidth=2.2,
            antialiased=False,    # crisper borders
            joinstyle="miter",
            capstyle="butt"
        )
        ax.add_patch(rect)

# Ticks centered on each cell with math labels k_i
centers = [idx * (cell_mm + gap_mm) + cell_mm / 2.0 for idx in range(n)]
labels = [
    r"$[CLS]$",
    r"$k_{1}$",
    r"$k_{2}$",
    r"$k_{3}$",
    r"$\cdots$",
    r"$k_{n-2}$",
    r"$k_{n-1}$",
]

ax.set_xticks(centers)
ax.set_yticks(centers)
ax.set_xticklabels(labels, fontsize=14)
ax.set_yticklabels(labels, fontsize=14)

# Matrix-style orientation (k0 at top)
ax.set_xlim(-1, W_mm + 1)
ax.set_ylim(H_mm + 1, -1)
ax.set_aspect("equal")

# Clean frame; keep only the drawn cell borders (no colorbar)
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(top=False, bottom=True, labeltop=False, labelbottom=True, left=True, right=False)

plt.tight_layout()

# Save ultra-sharp outputs
plt.savefig("attention_matrix_7x7_sharper_lighter.png", dpi=DPI_SAVE, bbox_inches="tight")
plt.savefig("attention_matrix_7x7_sharper_lighter.pdf", bbox_inches="tight")  # vector = perfectly sharp
plt.show()

A


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.colors import Normalize
LABEL_FONTSIZE = 15   # tăng/giảm tuỳ bạn, ví dụ 16/18/20

# SpanMinED alignment matrix (5x6)
A = np.array([
    [1.0,   0.0,   0.0,  0.0, 0.0, 0.0],
    [0.0, 0.625, 0.375,  0.0, 0.0, 0.0],
    [0.0,   0.0,   0.0,  0.6, 0.4, 0.0],
    [0.0,   0.0,   0.0,  0.0, 0.0, 1.0],
    [0.0,   0.0,   0.0,  0.0, 0.0, 1.0],
], dtype=float)

row_labels = [r"$s_{0}$ I'm", r'$s_{1}$ watching', r'$s_{2}$ funny', r'$s_{3}$ video', r'$s_{4}$ s']
col_labels = [r"$t_{0}$ I'm", r'$t_{1}$ watch', r'$t_{2}$ ing', r'$t_{3}$ fun', r'$t_{4}$ ny', r'$t_{5}$ videos']

n_rows, n_cols = A.shape

# Layout in millimeters (data units)
cell_mm = 15.0
gap_mm  = 1.0
W_mm = n_cols * cell_mm + (n_cols - 1) * gap_mm
H_mm = n_rows * cell_mm + (n_rows - 1) * gap_mm

# Sharper render
DPI_SCREEN = 400
DPI_SAVE   = 900
plt.rcParams["path.simplify"] = False

mm_to_in = 1.0 / 25.4
fig_w_in = (W_mm + 90.0) * mm_to_in
fig_h_in = (H_mm + 70.0) * mm_to_in

fig, ax = plt.subplots(figsize=(fig_w_in, fig_h_in), dpi=DPI_SCREEN)

# Color mapping for non-zero cells
norm = Normalize(vmin=0.0, vmax=float(A.max()))  # seep 0..max
cmap = plt.get_cmap()  # default matplotlib colormap

WHITEN_NONZERO = 0.35  # lighten non-zeros
ZERO_COLOR = (0.98, 0.98, 0.98, 1.0)  # very light gray (instead of purple)

def lighten_rgba(rgba, whiten=0.35):
    r, g, b, a = rgba
    r = r * (1 - whiten) + 1.0 * whiten
    g = g * (1 - whiten) + 1.0 * whiten
    b = b * (1 - whiten) + 1.0 * whiten
    return (r, g, b, a)

# Draw cells
for i in range(n_rows):
    for j in range(n_cols):
        x = j * (cell_mm + gap_mm)
        y = i * (cell_mm + gap_mm)

        val = float(A[i, j])

        # Fix: make 0-cells light (not purple)
        if val == 0.0:
            rgba = ZERO_COLOR
        else:
            rgba = lighten_rgba(cmap(norm(val)), WHITEN_NONZERO)

        rect = Rectangle(
            (x, y),
            cell_mm,
            cell_mm,
            facecolor=rgba,
            edgecolor="black",
            linewidth=2.2,
            antialiased=False,
            joinstyle="miter",
            capstyle="butt"
        )
        ax.add_patch(rect)

        # Fix: write all values INCLUDING zeros
        text_str = "0" if val == 0.0 else f"{val:.3g}"
        ax.text(
            x + cell_mm / 2.0,
            y + cell_mm / 2.0,
            text_str,
            ha="center",
            va="center",
            fontsize=14
        )

# Ticks centered on each cell
x_centers = [j * (cell_mm + gap_mm) + cell_mm / 2.0 for j in range(n_cols)]
y_centers = [i * (cell_mm + gap_mm) + cell_mm / 2.0 for i in range(n_rows)]

ax.set_xticks(x_centers)
ax.set_yticks(y_centers)
ax.set_xticklabels(col_labels, fontsize=LABEL_FONTSIZE)
ax.set_yticklabels(row_labels, fontsize=LABEL_FONTSIZE)

# Matrix-style orientation (row 0 at top)
ax.set_xlim(-1, W_mm + 1)
ax.set_ylim(H_mm + 1, -1)
ax.set_aspect("equal")

# Clean frame
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(top=False, bottom=True, labeltop=False, labelbottom=True, left=True, right=False)

plt.tight_layout()

plt.savefig("spanmined_alignment_table.png", dpi=DPI_SAVE, bbox_inches="tight")
plt.savefig("spanmined_alignment_table.pdf", bbox_inches="tight")
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.colors import Normalize

LABEL_FONTSIZE = 15   # tăng/giảm tuỳ bạn, ví dụ 16/18/20

# Alignment matrix (5x3): [watch, ing, videos]
A = np.array([
    [0.0,   0.0,   0.0],    # s0 "I'm"
    [0.625, 0.375, 0.0],    # s1 "watching"
    [0.0,   0.0,   0.0],    # s2 "funny"
    [0.0,   0.0,   1.0],    # s3 "video"
    [0.0,   0.0,   1.0],    # s4 "s"
], dtype=float)

row_labels = [r"$s_{0}$ I'm", r"$s_{1}$ watching", r"$s_{2}$ funny", r"$s_{3}$ video", r"$s_{4}$ s"]
col_labels = [r"$t_{1}$ watch", r"$t_{2}$ ing", r"$t_{5}$ videos"]

n_rows, n_cols = A.shape

# Layout in millimeters (data units)
cell_mm = 15.0
gap_mm  = 1.0
W_mm = n_cols * cell_mm + (n_cols - 1) * gap_mm
H_mm = n_rows * cell_mm + (n_rows - 1) * gap_mm

# Sharper render
DPI_SCREEN = 400
DPI_SAVE   = 900
plt.rcParams["path.simplify"] = False

mm_to_in = 1.0 / 25.4
fig_w_in = (W_mm + 90.0) * mm_to_in
fig_h_in = (H_mm + 70.0) * mm_to_in

fig, ax = plt.subplots(figsize=(fig_w_in, fig_h_in), dpi=DPI_SCREEN)

# Color mapping for non-zero cells
max_val = float(A.max()) if float(A.max()) > 0 else 1.0
norm = Normalize(vmin=0.0, vmax=max_val)
cmap = plt.get_cmap()  # default matplotlib colormap

WHITEN_NONZERO = 0.35  # lighten non-zeros
ZERO_COLOR = (0.98, 0.98, 0.98, 1.0)  # very light gray (instead of purple)

def lighten_rgba(rgba, whiten=0.35):
    r, g, b, a = rgba
    r = r * (1 - whiten) + 1.0 * whiten
    g = g * (1 - whiten) + 1.0 * whiten
    b = b * (1 - whiten) + 1.0 * whiten
    return (r, g, b, a)

# Draw cells
for i in range(n_rows):
    for j in range(n_cols):
        x = j * (cell_mm + gap_mm)
        y = i * (cell_mm + gap_mm)

        val = float(A[i, j])

        # Fix: make 0-cells light (not purple)
        if val == 0.0:
            rgba = ZERO_COLOR
        else:
            rgba = lighten_rgba(cmap(norm(val)), WHITEN_NONZERO)

        rect = Rectangle(
            (x, y),
            cell_mm,
            cell_mm,
            facecolor=rgba,
            edgecolor="black",
            linewidth=2.2,
            antialiased=False,
            joinstyle="miter",
            capstyle="butt"
        )
        ax.add_patch(rect)

        # Fix: write all values INCLUDING zeros
        text_str = "0" if val == 0.0 else f"{val:.3g}"
        ax.text(
            x + cell_mm / 2.0,
            y + cell_mm / 2.0,
            text_str,
            ha="center",
            va="center",
            fontsize=14
        )

# Ticks centered on each cell
x_centers = [j * (cell_mm + gap_mm) + cell_mm / 2.0 for j in range(n_cols)]
y_centers = [i * (cell_mm + gap_mm) + cell_mm / 2.0 for i in range(n_rows)]

ax.set_xticks(x_centers)
ax.set_yticks(y_centers)
ax.set_xticklabels(col_labels, fontsize=LABEL_FONTSIZE)
ax.set_yticklabels(row_labels, fontsize=LABEL_FONTSIZE)

# Matrix-style orientation (row 0 at top)
ax.set_xlim(-1, W_mm + 1)
ax.set_ylim(H_mm + 1, -1)
ax.set_aspect("equal")

# Clean frame
for spine in ax.spines.values():
    spine.set_visible(False)
ax.tick_params(top=False, bottom=True, labeltop=False, labelbottom=True, left=True, right=False)

plt.tight_layout()

plt.savefig("spanmined_alignment_5x3.png", dpi=DPI_SAVE, bbox_inches="tight")
plt.savefig("spanmined_alignment_5x3.pdf", bbox_inches="tight")
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.colors import Normalize

LABEL_FONTSIZE = 15

# Alignment matrix A (5x3): rows student, cols teacher
A = np.array([
    [0.0,   0.0,   0.0],    # s0 "I'm"
    [0.625, 0.375, 0.0],    # s1 "watching"
    [0.0,   0.0,   0.0],    # s2 "funny"
    [0.0,   0.0,   1.0],    # s3 "video"
    [0.0,   0.0,   1.0],    # s4 "s"
], dtype=float)

student_labels_full = [r"$s_{0}$ I'm", r"$s_{1}$ watching", r"$s_{2}$ funny", r"$s_{3}$ video", r"$s_{4}$ s"]
teacher_labels = [r"$t_{1}$ watch", r"$t_{2}$ ing", r"$t_{5}$ videos"]

# Example teacher attention tea_sel (3x3) over [watch, ing, videos]
tea_sel = np.array([
    [0.50, 0.30, 0.20],
    [0.20, 0.60, 0.20],
    [0.20, 0.20, 0.60],
], dtype=float)

# Select student rows that overlap selected teacher tokens -> watching, video, s
idx_s = np.where(A.sum(axis=1) > 0)[0]  # [1,3,4]
A_use = A[idx_s, :]                     # [3,3]
student_labels_sel = [student_labels_full[i] for i in idx_s]

# Projection: tea_proj = A_use @ tea_sel @ A_use.T
tea_proj = A_use @ tea_sel @ A_use.T

# Example student attention slice stu_sel (3x3) over [watching, video, s]
# Replace with your real stu_sel if you have it.
stu_sel = np.array([
    [0.50, 0.25, 0.25],
    [0.20, 0.40, 0.40],
    [0.20, 0.40, 0.40],
], dtype=float)

#--
# Drawing helper (same style), but HIDE values
#--
WHITEN_NONZERO = 0.35
ZERO_COLOR = (0.98, 0.98, 0.98, 1.0)

def lighten_rgba(rgba, whiten=0.35):
    r, g, b, a = rgba
    r = r * (1 - whiten) + 1.0 * whiten
    g = g * (1 - whiten) + 1.0 * whiten
    b = b * (1 - whiten) + 1.0 * whiten
    return (r, g, b, a)

def draw_matrix_symbolic(M, row_labels, col_labels, out_png, out_pdf,
                         cell_mm=15.0, gap_mm=1.0, dpi_screen=400, dpi_save=900):
    n_rows, n_cols = M.shape
    W_mm = n_cols * cell_mm + (n_cols - 1) * gap_mm
    H_mm = n_rows * cell_mm + (n_rows - 1) * gap_mm

    mm_to_in = 1.0 / 25.4
    fig_w_in = (W_mm + 90.0) * mm_to_in
    fig_h_in = (H_mm + 70.0) * mm_to_in

    fig, ax = plt.subplots(figsize=(fig_w_in, fig_h_in), dpi=dpi_screen)
    plt.rcParams["path.simplify"] = False

    max_val = float(np.max(M)) if float(np.max(M)) > 0 else 1.0
    norm = Normalize(vmin=0.0, vmax=max_val)
    cmap = plt.get_cmap()  # default matplotlib colormap

    # Draw cells (NO numbers)
    for i in range(n_rows):
        for j in range(n_cols):
            x = j * (cell_mm + gap_mm)
            y = i * (cell_mm + gap_mm)

            val = float(M[i, j])
            if val == 0.0:
                rgba = ZERO_COLOR
            else:
                rgba = lighten_rgba(cmap(norm(val)), WHITEN_NONZERO)

            rect = Rectangle(
                (x, y),
                cell_mm,
                cell_mm,
                facecolor=rgba,
                edgecolor="black",
                linewidth=2.2,
                antialiased=False,
                joinstyle="miter",
                capstyle="butt"
            )
            ax.add_patch(rect)
            text_str = "0" if val == 0.0 else f"{val:.3g}" 
            ax.text( x + cell_mm / 2.0, y + cell_mm / 2.0, text_str, ha="center", va="center", fontsize=14 )

    # ticks centered
    x_centers = [j * (cell_mm + gap_mm) + cell_mm / 2.0 for j in range(n_cols)]
    y_centers = [i * (cell_mm + gap_mm) + cell_mm / 2.0 for i in range(n_rows)]

    ax.set_xticks(x_centers)
    ax.set_yticks(y_centers)
    ax.set_xticklabels(col_labels, fontsize=LABEL_FONTSIZE)
    ax.set_yticklabels(row_labels, fontsize=LABEL_FONTSIZE)

    # Matrix-style orientation (row 0 at top)
    ax.set_xlim(-1, W_mm + 1)
    ax.set_ylim(H_mm + 1, -1)
    ax.set_aspect("equal")

    for spine in ax.spines.values():
        spine.set_visible(False)
    ax.tick_params(top=False, bottom=True, labeltop=False, labelbottom=True, left=True, right=False)

    plt.tight_layout()
    fig.savefig(out_png, dpi=dpi_save, bbox_inches="tight")
    fig.savefig(out_pdf, bbox_inches="tight")
    plt.show()

# Draw tea_proj and stu_sel (symbolic, no numbers)
draw_matrix_symbolic(
    tea_proj,
    row_labels=student_labels_sel,
    col_labels=student_labels_sel,
    out_png="tea_proj_symbolic_3x3.png",
    out_pdf="tea_proj_symbolic_3x3.pdf",
)

draw_matrix_symbolic(
    stu_sel,
    row_labels=student_labels_sel,
    col_labels=student_labels_sel,
    out_png="stu_sel_symbolic_3x3.png",
    out_pdf="stu_sel_symbolic_3x3.pdf",
)


In [None]:
from transformers import AutoTokenizer, AutoModel

student_name = "huawei-noah/TinyBERT_General_4L_312D"
tok_student = AutoTokenizer.from_pretrained(student_name)
model_student = AutoModel.from_pretrained(
    "bert-base-uncased",
    output_hidden_states=True,
    attn_implementation="eager",
)

# Choose one:
teacher_name = "Qwen/Qwen3-Embedding-0.6B"

tok_teacher = AutoTokenizer.from_pretrained(teacher_name, padding_side="left")
model_teacher = AutoModel.from_pretrained(
    teacher_name,
    output_hidden_states=True,
    attn_implementation="eager",
)

In [None]:
!pip install mteb

In [None]:
import mteb

task = mteb.get_tasks(["Banking77Classification"])
evaluator = mteb.MTEB(task)

model = mteb.get_model("BAAI/bge-m3")
evaluator.run(model)