# Installing the Dependencies

Use this code block to install the necessary tools

In [None]:
# !pip install -e .

# Loading the Dependencies

In [None]:
from deeplens.utils.analysis import *
from deeplens.pipeline import pipeline

# Initializing the Pipeline

In [None]:
original, original_output, modified_output = pipeline(
    text="He said he would",
    hf_model="gpt2",
    layer=3,
    sae_model="/Users/inigoparra/Desktop/deeplens/demo/model/gpt2_L3_1M.pt",
    sae_config="/Users/inigoparra/Desktop/deeplens/demo/model/config.yaml",
    feature=0,
    alpha=1000.0,
    tok_position=-1,
    generate=False,
    temperature=1.0
)

In [None]:
analysis = AnalysisUtils(
    hf_model="gpt2",
    sae_model="/Users/inigoparra/Desktop/deeplens/demo/model/gpt2_L3_1M.pt",
    sae_config="/Users/inigoparra/Desktop/deeplens/demo/model/config.yaml",
    layer=3
)

# Plotting the Probability Distributions Over the Vocabulary

In [None]:
results = [original, original_output, modified_output]
titles = ["Raw (no SAE)", "SAE Baseline", "SAE + Intervention"]
for logits, title in zip(results, titles):
    analysis.plot_topk_distribution(
        logits=logits, 
        use_softmax=True, 
        k=50, 
        position=-1, 
        title=title
    )

# Plotting Logit Heatmaps

In [None]:
for logits in results:
    analysis.generate_logit_heatmap(
        logits=logits,
        k=10000
    )

# Get Top-k Predicted Tokens (per position)

In [None]:
results = analysis.get_top_k_tokens(
    logits=modified_output,
    k=5,
    to_dataframe=True,
    verbose=False
)
results

# Top-k Activated Features (across sentences; for specific tokens)

In [None]:
sentences = [
    # Wh-words in question context
    "What did you say to him?",
    "Where are you going tonight?",
    "When will the meeting start?",
    "Why did she leave so early?",
    "Who is coming to the party?",
    "Which color do you prefer?",
    # Wh-words in non-question context
    "What you did was unacceptable.",
    "Where he went remains a mystery.",
    "When she arrived surprised everyone.",
    "Why he left is still unclear.",
    "Who wins the game gets a prize.",
    "Which team plays first matters a lot.",
]

feats = analysis.get_most_active_features(
    sentences=sentences, 
    target=0,
    k=5,
    case_sensitive=False,
    return_values=True
)

In [None]:
for key, result in feats.items():
    print(key)
    print(f"\t{result.features}")
    if result.values is not None:
        print(f"\t{result.values}")