In [43]:
# =====================
# 0. Imports
# =====================
import torch
import matplotlib.pyplot as plt
import gradio as gr
from transformers import GPT2Tokenizer, GPT2Model
from openai import OpenAI
import string


# Import HSMM utilities
from models.hsmm_module import HSMM_LDS_Torch, temporal_pool, zscore_torch
from models.sae import SparseAutoencoder


In [9]:
# =====================
# 1. OpenAI API
# =====================
# Recommended: set OPENAI_API_KEY in environment variables
client = OpenAI(api_key="YOUR_API_KEY")


In [10]:
# =====================
# 2. Load models
# =====================
# =====================
# 2. Load models
# =====================
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt2_model = GPT2Model.from_pretrained("gpt2").cuda().eval()

# Load pretrained Sparse AutoEncoder (proper way)
from models.sae import SparseAutoencoder
sae = SparseAutoencoder(input_dim=768, hidden_dim=256, sparsity=1e-3).cuda()
sae.load_state_dict(torch.load("./models/sae_model.pt", map_location="cuda"))
sae.eval()

K = 10  # set according to your pretrained model

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

In [47]:

def temporal_pool(Z, w=5):
    pad = (w-1)//2
    left = Z[0].unsqueeze(0).repeat(pad, 1)
    right = Z[-1].unsqueeze(0).repeat(pad, 1)
    Zp = torch.cat([left, Z, right], dim=0)
    return torch.stack([Zp[i:i+len(Z)] for i in range(w)], 0).mean(0)

def zscore_torch(Y, eps=1e-8):
    mu = Y.mean(dim=0, keepdim=True)
    sd = Y.std(dim=0, keepdim=True).clamp_min(eps)
    return (Y - mu) / sd

def label_modes(modes):
    label_map, labels, next_label_idx = {}, [], 0
    alphabet = list(string.ascii_uppercase)
    for m in modes:
        if m not in label_map:
            label_map[m] = alphabet[next_label_idx]
            next_label_idx += 1
        labels.append(label_map[m])
    return labels, label_map

def extract_segments(tokens, labels):
    segments, start = [], 0
    for i in range(1, len(labels)):
        if labels[i] != labels[i-1]:
            segments.append({
                "label": labels[start],
                "start": start,
                "end": i-1,
                "text": " ".join(tokens[start:i])
            })
            start = i
    segments.append({
        "label": labels[start],
        "start": start,
        "end": len(labels)-1,
        "text": " ".join(tokens[start:])
    })
    return segments

In [51]:
def analyze_and_explain(text):
    # --- GPT-2 hidden states ---
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256).to("cuda")
    with torch.no_grad():
        hidden = gpt2_model(**inputs).last_hidden_state.squeeze(0)
        _, z_seq = sae(hidden)

    # --- SAE latent sequence ---
    z_seq = temporal_pool(z_seq.cpu(), w=5)
    Y_art = zscore_torch(z_seq).to("cuda")

    # --- HSMM segmentation ---
    hsmm = HSMM_LDS_Torch(
        K=min(10, K, max(2, Y_art.shape[0] // 2)),
        Dmax=10,
        obs_dim=Y_art.shape[1],
        init_mean_dur=20.0,
        allow_self_transition=True,
        duration_model="negbin",
        device="cuda",
        Y_init=Y_art
    )
    _ = hsmm.fit(Y_art, n_iter=5, verbose=False)
    z_example = hsmm.viterbi_decode(Y_art).cpu().numpy()

    # --- Token recovery and segmentation ---
    tokens = [tokenizer.decode([tid]) for tid in inputs["input_ids"][0]]
    span = min(80, len(tokens))  # ← increase span to enlarge visualization target
    z_sub, tokens_sub = z_example[:span], tokens[:span]
    labels, _ = label_modes(z_sub)
    segments = extract_segments(tokens_sub, labels)

    # --- Visualization ---
    fig, ax = plt.subplots(figsize=(18, 4))  # ← enlarge figure size
    ax.imshow(z_sub.reshape(1, -1), aspect="auto", cmap="tab10")
    ax.set_yticks([])
    ax.set_xlabel("Tokens", fontsize=18)
    ax.set_title("Mode sequence", fontsize=20, fontweight="bold")

    # draw tokens on x-axis
    ax.set_xticks(range(len(tokens_sub)))
    ax.set_xticklabels(tokens_sub, rotation=90, fontsize=10)

    # mode boundaries + token labels
    for s in segments[1:]:
        x = s["start"] - 0.5
        ax.axvline(x, color="white", linestyle="--", alpha=0.8, linewidth=1.5)
        boundary_token = tokens_sub[s["start"]]
        ax.text(s["start"], -0.6, boundary_token,  # display under x-axis
                rotation=90, ha="center", va="top",
                fontsize=9, color="red", fontweight="bold")

    plt.tight_layout()
    plt.close(fig)

    # --- OpenAI explanation ---
    desc = "\n".join(
        [f"- Segment {s['label']} (tokens {s['start']}–{s['end']}): \"{s['text']}\"" for s in segments]
    )
    prompt = f"""
    {{
      "instruction": "The following is the result of mode decomposition of LLM hidden states. Each mode is assumed to represent a potential syntactic or semantic unit.",
      "input_text": "{text}",
      "segments": "{desc}",
      "task": [
          "Explain how each mode corresponds to syntactic factors (grammatical or structural boundaries).",
          "Explain how each mode corresponds to semantic factors (lexical or meaning-related units).",
          "Discuss the reasons when syntactic and semantic factors align or diverge."
      ]
    }}
    """

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )
    explanation = response.choices[0].message.content

    return fig, explanation


In [53]:
# =====================
# 4. Gradio Interface
# =====================
with gr.Blocks() as demo:
    gr.Markdown("# MoDeLM-Analyzer: Interactive Demo")
    inp = gr.Textbox(lines=3, label="Input Text")
    out_fig = gr.Plot(label="Mode Visualization")
    out_txt = gr.Markdown(label="Explanation")
    run_btn = gr.Button("Run")
    run_btn.click(analyze_and_explain, inputs=inp, outputs=[out_fig, out_txt])

demo.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f253e8cad2e610a433.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


