Author: Amiri Hayes \
Date Updated: 7/15/25 \
Title: ViewLLM

Starter code !

In [None]:
# package installs if necessary
!pip install matplotlib torch spacy nltk tqdm transformers
!python3 -m spacy download en_core_web_sm

In [None]:
# IMPORTS
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import spacy
nlp = spacy.load("en_core_web_sm")
import nltk
nltk.download('punkt_tab')
from tqdm import tqdm
from nltk.tokenize import sent_tokenize
from typing import Callable, Optional
from matplotlib.colors import PowerNorm
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
print("Imports completed successfully.")

<div style="text-align: center; font-size: 2em;">
  <hr>
    Data & Pattern Initialization Functions
  <hr>
</div>

In [None]:
# POSITIONAL FILTERING PATTERNS:

def next_attention(sentence, tokenizer):
    toks = tokenizer([sentence], return_tensors="pt")
    len_seq = len(toks.input_ids[0])
    out = np.zeros((len_seq, len_seq))
    for i in range(1, len_seq-1):
        out[i, i+1] = 1
    out[0,0] = 1
    out[-1,0] = 1
    return "Next Head Attention Pattern", out

def previous_attention(sentence, tokenizer):
    toks = tokenizer([sentence], return_tensors="pt")
    len_seq = len(toks.input_ids[0])
    out = np.zeros((len_seq, len_seq))
    for i in range(1, len_seq-1):
        out[i, i-1] = 1
    out[0,0] = 1
    out[-1,0] = 1
    return "Previous Head Attention Pattern", out

def same_attention(sentence, tokenizer):
    toks = tokenizer([sentence], return_tensors="pt")
    len_seq = len(toks.input_ids[0])
    out = np.zeros((len_seq, len_seq))
    for i in range(1, len_seq-1):
        out[i, i] = 1
    out[0,0] = 1
    out[-1,0] = 1
    return "Same Head Attention Pattern", out

def punctuation_attention(sentence, tokenizer):
    toks = tokenizer([sentence], return_tensors="pt")
    len_seq = len(toks.input_ids[0])
    out = np.zeros((len_seq, len_seq))
    words = tokenizer.convert_ids_to_tokens(toks.input_ids[0])
    punctuation_set = set('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')
    punctuation_indices = [i for i, tok in enumerate(words) if any(p in tok for p in punctuation_set)]
    for i in range(len_seq):
        future_punct = [j for j in punctuation_indices if j > i]
        if future_punct:
            for j in future_punct:
                out[i, j] = 1.0
            out[i] /= out[i].sum()
        else:
            out[i, i] = 1.0
    out += 1e-4
    out = out / out.sum(axis=1, keepdims=True)
    return "Punctuation Pattern", out

def repeated_attention(sentence, tokenizer):
    toks = tokenizer([sentence], return_tensors="pt")
    input_ids = toks.input_ids[0].tolist()
    len_seq = len(toks.input_ids[0])
    out = np.zeros((len_seq, len_seq))
    for i in range(1, len_seq-1):
        token_id = input_ids[i]
        for j in range(1, len_seq-1):
            if input_ids[j] == token_id:
                out[i, j] = 1
    out[0,0] = 1
    out[-1,0] = 1
    out = out / out.sum(axis=1, keepdims=True)
    return "Repitition Pattern", out

# LINGUISTIC ROLE ALIGNMENT PATTERNS:

def pos_alignment(sentence, tokenizer):
    toks = tokenizer(sentence, return_tensors="pt", return_offsets_mapping=True, add_special_tokens=True)
    input_ids = toks.input_ids[0].tolist()
    word_ids = toks.word_ids(0)
    len_seq = len(input_ids)
    out = np.zeros((len_seq, len_seq))
    doc = nlp(sentence)
    pos_tags = [token.pos_ for token in doc]
    pos_to_token_indices = {}
    for token_idx, word_idx in enumerate(word_ids):
        if word_idx is None or word_idx >= len(pos_tags):
            continue
        pos = pos_tags[word_idx]
        pos_to_token_indices.setdefault(pos, []).append(token_idx)
    for token_indices in pos_to_token_indices.values():
        if len(token_indices) > 1:
            for i in token_indices:
                for j in token_indices:
                    out[i, j] = 1
        else:
            i = token_indices[0]
            out[i, i] = 1
    out[0, 0] = 1
    out[-1, -1] = 1
    out += 1e-4
    out = out / out.sum(axis=1, keepdims=True)
    return "Part of Speech Pattern", out

def dependencies(sentence, tokenizer):
    toks = tokenizer([sentence], return_tensors="pt")
    len_seq = len(toks.input_ids[0])
    out = np.zeros((len_seq, len_seq))
    words = sentence.split()
    doc = nlp(" ".join(words))
    check_errors = False
    if check_errors:
        if len(doc) == 0: print("problem, doc empty")
        if len(doc) != (len_seq-2): print("problem, doc length mismatch", len(doc), len(toks)-2)
    for stok in doc:
        parent_index = stok.i
        for child_stok in stok.children:
            child_index = child_stok.i
            out[parent_index+1, child_index+1] = 1
            out[child_index+1, parent_index+1] = 1
    out[0, 0] = 1
    out[-1, 0] = 1
    out += 1e-4
    out = out / out.sum(axis=1, keepdims=True)
    return "Dependency Parsing Pattern", out

# SEMI-STRUCTURED EVALUATION PATTERN:

def chainofthought_pattern(sentence, tokenizer):
    out = []
    return "", out


In [None]:
# GENERATE & VISUALIZE ATTENTION PATTERN SCORES

def js_divergence(p, q):
    p = np.clip(p, 1e-12, 1.0)
    q = np.clip(q, 1e-12, 1.0)
    p /= p.sum()
    q /= q.sum()
    m = 0.5 * (p + q)
    return 0.5 * (np.sum(p * np.log(p / m)) + np.sum(q * np.log(q / m)))

def score_prediction(torch_model, torch_tokenizer, head_loc, pattern, sentence_1, sentence_2: Optional[str] = None, distance="jsd", output=False):
    layer, head = head_loc
    tokens = torch_tokenizer(sentence_1, return_tensors="pt")

    if torch_model.config.is_encoder_decoder: # decoder model case ->
        decoder_input_ids = tokens["input_ids"]
        outputs = torch_model(input_ids=tokens["input_ids"], decoder_input_ids=decoder_input_ids, output_attentions=True)
        att = outputs.encoder_attentions[layer][0, head].detach().numpy()
    else: # encoder-decoder model case ->
        if sentence_2:
            att = 0
        else:
            att = torch_model(**tokens, output_attentions=True).attentions[layer][0, head].detach().numpy()

    name, pred_att = pattern(sentence_1, torch_tokenizer)

    if distance == "raw":
        score = np.abs(att - pred_att).sum()
    elif distance == "jsd":
      jensonshannon_distances = []
      for row_att, row_out in zip(att, pred_att):
          jensonshannon_distances.append(np.sqrt(js_divergence(row_att, row_out)))
      score = np.mean(jensonshannon_distances)

    if output == True:
        colors="inferno"
        fig, axes = plt.subplots(1, 2, figsize=(12, 5))
        im1 = axes[0].imshow(att, cmap=colors, aspect='auto')
        axes[0].set_title("Actual Head Attention")
        fig.colorbar(im1, ax=axes[0])
        im2 = axes[1].imshow(pred_att, cmap=colors, aspect='auto')
        axes[1].set_title("Optimal Head Attention for Pattern")
        fig.colorbar(im2, ax=axes[1])
        underlined_name_unicode = "".join([char + '\u0332' for char in name])
        plt.suptitle(f"Results: {underlined_name_unicode} @ L{layer},H{head} | Raw Score = {score:.2f}\n\nSentence: \"{sentence}\"", fontsize=16)
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.show()

    elif output == "optimal":
        colors = "Oranges"
        fig, ax = plt.subplots(1, 1, figsize=(6, 5))
        im2 = ax.imshow(pred_att, cmap=colors, aspect='auto') #
        ax.set_axis_off()
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.show()

    elif output == "actual":
        colors = "Reds"
        fig, ax = plt.subplots(1, 1, figsize=(6, 5)) # Changed to 1 column, 1 row
        im2 = ax.imshow(att, cmap=colors, aspect='auto') # Plot directly on 'ax'
        ax.set_title("Example Head Attention for Pattern")
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.show()

    return score

In [None]:
# DOWNLOAD ADDITIONAL TEXT DATA (1):

from google.colab import drive
drive.mount('/content/drive')
file = '/content/drive/MyDrive/small_text.csv'
df = pd.read_csv(file)

sentences = []
for paragraph in df['text']:
    sentences.extend(sent_tokenize(paragraph))
sentences = sentences[:10_000]

print("Sentences from Tiny Stories Dataset:")
for sentence in sentences[20:30]:
  print(f"\t{sentence}")

In [None]:
# DOWNLOAD ADDITIONAL TEXT DATA (2):

splits = {'test': 'abstract_algebra/test-00000-of-00001.parquet', 'validation': 'abstract_algebra/validation-00000-of-00001.parquet', 'dev': 'abstract_algebra/dev-00000-of-00001.parquet'}
df = pd.read_parquet("hf://datasets/cais/mmlu/" + splits["test"])
df.head()

print("Sentences from Abstract Algebra Dataset:")
for i in range(0,8):
  print("\t", df.iloc[i]['question'])

<div style="text-align: center; font-size: 2em;">
  <hr>
  Analysis Helper Functions
  <hr>
</div>

In [None]:
# ANALYZE PATTERN AT LAYER AND HEAD

sentence = "The quick brown fox jumps over the lazy dog."
sentence = "Hi. How are you? I'm fine! Thanks. Bye, see you tomorrow."
sentence = "I like apples and I like bananas. I like apples more though."

models = ["bert-base-uncased", "openai-community/gpt2", "google-t5/t5-small", "Meta-Llama-3.1-8B-Instruct"]

i = 0
name = models[i]

if i in [0,1,2]: # small models ( <400M Params )
    tokenizer = AutoTokenizer.from_pretrained(name)
    model = AutoModel.from_pretrained(name, output_attentions=True)
    model.eval()

elif i == 3: # big model ( 8B Params, Load from Compute Node )
    tokenizer = AutoTokenizer.from_pretrained("/raid/lingo/models/Meta-Llama-3.1-8B-Instruct/")
    model = AutoModelForCausalLM.from_pretrained("/raid/lingo/models/Meta-Llama-3.1-8B-Instruct/")
    model.eval()

layer, head = 2, 0
score_prediction(sentence, model, tokenizer, (layer, head), pos_alignment, distance="jsd", output=True)

In [None]:
# ANALYZE ALL HEADS FOR A PATTERN (1 SENTENCE)

def visualize_full_model(sentence, torch_model, torch_tokenizer, pattern, title, bias_towards_best=0.9):
    num_layers = model.config.num_hidden_layers
    num_heads = model.config.num_attention_heads
    model_viz = np.zeros((num_layers, num_heads))

    for i in range(num_layers):
        for j in range(num_heads):
            score = score_prediction(sentence, torch_model, torch_tokenizer, (i, j), pattern, distance="jsd", output=False)
            model_viz[i, j] = score
    print(f"Best Score: {min(map(min, model_viz)):.2f}")

    plt.figure(figsize=(12, 6))
    norm = PowerNorm(gamma=1-bias_towards_best, vmin=model_viz.min(), vmax=model_viz.max())
    plt.imshow(model_viz, cmap='bone', aspect='auto', norm=norm)
    plt.colorbar()
    plt.title(title, fontsize=16)
    plt.xlabel("Attention Heads")
    plt.ylabel("Transformer Layers")
    plt.xticks(ticks=np.arange(num_heads), labels=[f'H{h}' for h in range(num_heads)])
    plt.yticks(ticks=np.arange(num_layers), labels=[f'L{l}' for l in range(num_layers)])
    plt.tight_layout()
    plt.show()

    top_three = list(zip(*np.unravel_index(np.argsort(model_viz, axis=None)[:3], model_viz.shape)))
    for tuple_val in top_three:
        layer, head = tuple_val
        score = model_viz[layer, head]
        print(f"Layer {layer}, Head {head} - Score: {score:.2f}")
    return model_viz, top_three

model_name = f"Model = {model.config.architectures[0]}\n"
sentence = "Hi. How are you? I'm fine! Thanks. Bye, see you tomorrow."
pattern_name = "Punctuation Attention Pattern"
underlined_name_unicode = "".join([char + '\u0332' for char in pattern_name])
visualize_full_model(sentence, model, tokenizer, punctuation_attention, title=f"Top Heads: {underlined_name_unicode} | {model_name}\nSentence: \"{sentence}\"\n", bias_towards_best=0.7)

In [None]:
# ANALYZE ALL HEADS FOR A PATTERN (MULTIPLE SENTENCES)

def visualize_full_model(sentences, torch_model, torch_tokenizer, pattern, title, bias_towards_best=0.9):
    num_layers = torch_model.config.num_hidden_layers
    num_heads = torch_model.config.num_attention_heads

    average_score = np.zeros((num_layers, num_heads))
    for sentence in tqdm(sentences):
        model_score = np.zeros((num_layers, num_heads))
        for i in range(num_layers):
            for j in range(num_heads):
                score = score_prediction(sentence, torch_model, torch_tokenizer, (i, j), pattern, output=False)
                model_score[i, j] = score
        average_score += model_score
    average_score /= len(sentences)

    plt.figure(figsize=(12, 6))
    norm = PowerNorm(gamma=1-bias_towards_best, vmin=average_score.min(), vmax=average_score.max())
    plt.imshow(average_score, cmap='bone', aspect='auto', norm=norm)
    plt.colorbar()
    plt.title(title, fontsize=16)
    plt.xlabel("Attention Heads")
    plt.ylabel("Transformer Layers")
    plt.xticks(ticks=np.arange(num_heads), labels=[f'H{h}' for h in range(num_heads)])
    plt.yticks(ticks=np.arange(num_layers), labels=[f'L{l}' for l in range(num_layers)])
    plt.tight_layout()
    plt.show()

    top_three = list(zip(*np.unravel_index(np.argsort(average_score, axis=None)[3:], average_score.shape)))
    for tuple_val in top_three:
            layer, head = tuple_val
            score = average_score[layer, head]
            print(f"Layer {layer}, Head {head} - Score: {score:.2f}")
    return average_score, top_three

model_name = f"Model = {model.config.architectures[0]}\n"
visualize_full_model(sentences[0:5], model, tokenizer, punctuation_attention, title="Top Heads: Punctuation Attention Pattern [AVERAGE]\n"+model_name, bias_towards_best=0.7)

In [None]:
# FILTER SENTENCES TO TARGET SPECIFIC PATTERNS

def filterer(sentences, length_matters=False, punctuation_matters=False, duplicates=False):
    filtered_sentences = []
    punctuation_set = set('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')
    for sentence in sentences:

        passes_all_active_filters = True
        if length_matters:
            if len(sentence.split()) < 5:
                passes_all_active_filters = False
                continue
        if punctuation_matters:
            punctuation_count = sum(1 for char in sentence if char in punctuation_set)
            if punctuation_count < 3:
                passes_all_active_filters = False
                continue
        if duplicates:
            set_l = len(set(sentence.split()))
            sent_1 = len(sentence.split())
            if set_l >= sent_1 - 3:
                passes_all_active_filters = False
                continue
        if passes_all_active_filters: filtered_sentences.append(sentence)

    return filtered_sentences

filtered_sentences = filterer(sentences, length_matters = True)

In [None]:
# ANALYZE HEAD PATTERN ON SELECTED SENTENCES

def visualize_highest_head(sentences, top_n, torch_model, torch_tokenizer, head_loc, pattern):
    layer, head = head_loc
    scores = []

    for sentence in tqdm(sentences):
        score = score_prediction(sentence, torch_model, torch_tokenizer, (layer, head), pattern, distance="jsd")
        scores.append(score)

    indexed_scores = list(enumerate(scores))
    sorted_scores = sorted(indexed_scores, key= lambda x: x[1])
    top_scores = sorted_scores[:top_n]

    for idx, score in top_scores:
        print(f"Sentence #{idx} Score: {score}")
        sentence = sentences[idx]
        score_prediction(sentence, torch_model, torch_tokenizer, (layer, head), pattern, output=True)
    return 0

layer, head = 7, 8
visualize_highest_head(filtered_sentences, 8, model, tokenizer, (layer, head), punctuation_attention)

<div style="text-align: center; font-size: 2em;">
  <hr>
    Filter Automation Efforts
  <hr>
</div>

In [None]:
import re
import json

In [None]:
# AUTOMATICALLY GENERATE PATTERNS FOR A HEAD

def generate_prompt(sentences, layer=7, head=1, top_k_ratio=0.1):
    model_name = "bert-base-uncased"
    model = AutoModel.from_pretrained(model_name, output_attentions=True)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    data = {
        "layer": layer,
        "head": head,
        "model": model_name,
        "examples": []
    }
    def scrape_head(att, tokens, ignore_special=True, top_k_ratio=0.1):
        seq_len = att.shape[0]
        ignore_indices = {i for i, tok in enumerate(tokens) if ignore_special and tok in ("[CLS]", "[SEP]", "[PAD]")}
        keep_indices = [i for i in range(seq_len) if i not in ignore_indices]
        att_scores = []
        for i in keep_indices:
            for j in keep_indices:
                att_scores.append((i, j, att[i, j]))
        top_k = max(1, int(len(att_scores) * top_k_ratio))
        top_att = sorted(att_scores, key=lambda x: x[2], reverse=True)[:top_k]
        top_activations = []
        for i, j, score in top_att:
            top_activations.append({
                f"from_token_{i}": tokens[i],
                f"to_token_{j}": tokens[j],
                "weight": float(score)
            })
        return top_activations
    for sentence in sentences:
        inputs = tokenizer(sentence, return_tensors="pt")
        tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
        with torch.no_grad():
            outputs = model(**inputs)
            att = outputs.attentions[layer][0, head]
        att = att.detach().cpu().numpy()
        top_activations = scrape_head(att, tokens, top_k_ratio=top_k_ratio)
        item = {
            "sentence": sentence,
            "attention": top_activations
        }
        data["examples"].append(item)

    data = json.dumps(data, indent=2)
    prompt = f"""
    Using the following pieces of data based on {len(sentences[0])} sentences, generate three
    hypothesises about the linguistic role the following head is responsible for based on patterns
    in the activations. These patterns can be simple or complex. Here are some examples: {data} Then, choose the most fitting hypothesis for the head responsibility using a few examples from the data.
    Finally, using the linguistic hypothesis you determine, write a python function which takes in a sentence and tokenizer as parameters and
    outputs the name of the pattern you hypothesize along with a 'predicted_matrix' (size: token_len * token_len),
    which is the rule-encoded matrix that mirroring attention patterns you'd predict for any given sentence for
    Layer {layer}, Head {head}. Feel free to use the capacbilities of proved libraries like spacey and nltk for describing linguistic concepts. Feel free to encode complex functions. Make sure you generalize your hypothesis pattern to any sentence. As examples:,
    Layer 3, Head 9 has been found to be responsible for dependency parsing. It's predicted pseudocode would look like:
    def dependencies(sentence, tokenizer):
    toks = tokenizer([sentence], return_tensors="pt")
    len_seq = len(toks.input_ids[0])
    out = np.zeros((len_seq, len_seq))
    words = sentence.split()
    # use spacey nlp to split word into doc dependency tree
    # loop through each node in tree and assign directional attention
    # to the matrix 'out' by adding one when there is an outgoing edge.
    # assign cls (out[0, 0] = 1) and eos (out[-1, 0] = 1) to have self_attention
    # Normalize out matrix by row (results in uniform attention) and return out
    return 'Dependency Parsing Pattern', out
    Here is another pseudocode example for one method to implement part-of-speech:
    def pos_alignment(sentence, tokenizer):
    # assign toks, input_ids, word_ids, len_seq, out, doc
    # use spacey to get pos_tags for tokens in docs [token.pos_ for token in doc]
    # for token in pos_tags:
    # loop through pos_tags and increment out[i,j] when pos_tags match
    # assign cls (out[0, 0] = 1) and eos (out[-1, 0] = 1) to have self_attention
    # Normalize out matrix by row (results in uniform attention) and return out
    # return 'Part of Speech Implementation 1', out
    """
    return ' '.join(prompt.strip().split())

generate_prompt(same_length_sentences)

In [None]:
def format_prompt(question, choices):
    options = [f"{chr(65 + i)}. {choice}" for i, choice in enumerate(choices)]
    return f"{question}\n" + "\n".join(options) + "\nAnswer:"

def print_ans(actual, predicted):
        print(f"Answer | Actual = {actual}, Predicted = {predicted}")

zero_token_id = tokenizer.encode("0", add_special_tokens=False)[0]
one_token_id = tokenizer.encode("1", add_special_tokens=False)[0]
two_token_id = tokenizer.encode("2", add_special_tokens=False)[0]
three_token_id = tokenizer.encode("3", add_special_tokens=False)[0]

def evaluate_model_answer(model, tokenizer, question_text, correct_answer_idx):
    input_text = question_text
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

    valid_token_ids = torch.tensor([zero_token_id, one_token_id, two_token_id, three_token_id], device=model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=1,  # Generate only one new token
            num_return_sequences=1,
            do_sample=False,   # For deterministic output
            pad_token_id=tokenizer.eos_token_id,
            output_scores=True, # Get logits to verify
            return_dict_in_generate=True
        )

    predicted_token_id = outputs.sequences[0, -1].item()

    if predicted_token_id == zero_token_id: predicted_answer_idx = 0; print_ans(0, 0)
    elif predicted_token_id == one_token_id: predicted_answer_idx = 1; print_ans(1, 1)
    elif predicted_token_id == two_token_id: predicted_answer_idx = 2; print_ans(2, 2)
    elif predicted_token_id == three_token_id: predicted_answer_idx = 3; print_ans(3, 3)
    else:
        print_ans(correct_answer_idx, predicted_token_id)
        return False

    return predicted_answer_idx == correct_answer_idx

print("\nEvaluating model without hints...")
df['correct_no_hint'] = False # Initialize column

for index in tqdm(range(2)):
    row = df.iloc[index]
    question_with_choices = format_prompt(row['question'], row['choices'])
    df.at[index, 'correct_no_hint'] = evaluate_model_answer(
        model, tokenizer, question_with_choices, row['answer']
    )

print("\nEvaluating model with hints...")
df['correct_with_hint'] = False # Initialize column

for index in tqdm(range(2)):
    row = df.iloc[index]
    correct_choice_letter = chr(65 + row['answer'])
    question_with_choices = format_prompt(row['question'], row['choices'])
    hinted_question = f"{question_with_choices} (Answer: {correct_choice_letter})"

    df.at[index, 'correct_with_hint'] = evaluate_model_answer(
        model, tokenizer, hinted_question, row['answer']
    )

print("\n\n--- Evaluation Results ---")
print(f"Total questions: {len(df)}")
# print(f"Number correct without hint: {len(df_correct_no_hint)}")
# print(f"Number incorrect without hint: {len(df_incorrect_no_hint)}")
# print(f"Number correct with hint: {len(df_correct_with_hint)}")
# print(f"Number incorrect with hint: {len(df_incorrect_with_hint)}\n\n")

In [None]:
# INITIAL AUTOMATED / LLM-GENERATED FILTERS

def direct_object_prepositional_object_alignment(sentence, tokenizer):
    """
    Hypothesizes that Layer 7, Head 1 is responsible for aligning verbs and prepositions
    with their direct or prepositional objects.

    Args:
        sentence (str): The input sentence.
        tokenizer: The tokenizer object (e.g., AutoTokenizer.from_pretrained("bert-base-uncased")).

    Returns:
        tuple: A string describing the pattern and a 2D numpy array
               representing the predicted attention matrix.
    """
    toks = tokenizer([sentence], return_tensors="pt")
    input_ids = toks.input_ids[0]
    token_len = len(input_ids)
    predicted_matrix = np.zeros((token_len, token_len))

    # Get word IDs to align with spaCy tokens
    word_ids = toks.word_ids()

    # Process sentence with spaCy
    doc = nlp(sentence)

    # Map spaCy token indices to BERT token indices
    spacy_to_bert_map = {}
    bert_to_spacy_map = {}
    current_spacy_token_idx = -1
    for bert_idx, word_id in enumerate(word_ids):
        if word_id is not None and (current_spacy_token_idx == -1 or word_id != word_ids[bert_idx - 1]):
            current_spacy_token_idx = word_id
            spacy_to_bert_map[current_spacy_token_idx] = bert_idx
            bert_to_spacy_map[bert_idx] = current_spacy_token_idx
        elif word_id is not None:
            bert_to_spacy_map[bert_idx] = current_spacy_token_idx

    # Iterate through spaCy tokens to find verbs and prepositions and their objects
    for i, token in enumerate(doc):
        # Find BERT index for the current spaCy token
        from_bert_idx_start = -1
        for bert_idx, spacy_id in bert_to_spacy_map.items():
            if spacy_id == i:
                from_bert_idx_start = bert_idx
                break

        if from_bert_idx_start == -1: # Skip if spaCy token doesn't map to BERT token
            continue

        # Look for direct objects (dobj) or prepositional objects (pobj)
        if token.pos_ == "VERB":
            for child in token.children:
                if child.dep_ == "dobj":
                    # Distribute attention from the verb to its direct object tokens
                    to_bert_idx_start = -1
                    for bert_idx, spacy_id in bert_to_spacy_map.items():
                        if spacy_id == child.i:
                            to_bert_idx_start = bert_idx
                            break
                    if to_bert_idx_start != -1:
                        # Find all BERT tokens that correspond to the spaCy child token
                        bert_indices_for_child = [b_idx for b_idx, s_id in bert_to_spacy_map.items() if s_id == child.i]
                        if bert_indices_for_child:
                            # Assign high attention from the 'from' BERT token (verb)
                            # to all BERT tokens that form the 'to' (object)
                            for to_b_idx in bert_indices_for_child:
                                predicted_matrix[from_bert_idx_start, to_b_idx] = 0.8 # High weight

        elif token.pos_ == "ADP":  # Adposition (preposition or postposition)
            for child in token.children:
                if child.dep_ == "pobj":
                    # Distribute attention from the preposition to its object tokens
                    to_bert_idx_start = -1
                    for bert_idx, spacy_id in bert_to_spacy_map.items():
                        if spacy_id == child.i:
                            to_bert_idx_start = bert_idx
                            break
                    if to_bert_idx_start != -1:
                        bert_indices_for_child = [b_idx for b_idx, s_id in bert_to_spacy_map.items() if s_id == child.i]
                        if bert_indices_for_child:
                            for to_b_idx in bert_indices_for_child:
                                predicted_matrix[from_bert_idx_start, to_b_idx] = 0.8 # High weight

    # Add self-attention for [CLS] and [SEP] tokens
    predicted_matrix[0, 0] = 1.0
    predicted_matrix[token_len - 1, token_len - 1] = 1.0

    # For any row where no attention has been assigned, distribute attention uniformly
    # or assign to [CLS] for general context
    for i in range(token_len):
        if np.sum(predicted_matrix[i, :]) == 0:
            # Fallback: if no specific object found, distribute attention somewhat broadly
            # or assign to CLS for general context (this is a heuristic)
            predicted_matrix[i, 0] = 0.5 # Attend to CLS for general context
            predicted_matrix[i, i] = 0.5 # Self-attention

    # Normalize each row to sum to 1
    for i in range(token_len):
        row_sum = np.sum(predicted_matrix[i, :])
        if row_sum > 0:
            predicted_matrix[i, :] = predicted_matrix[i, :] / row_sum

    return 'Direct Object / Prepositional Object Alignment', predicted_matrix

def determiner_noun_phrase_linking(sentence: str, tokenizer) -> tuple[str, np.ndarray]:
    """
    Hypothesizes attention patterns where determiners link to the nouns
    and adjectives within their associated noun phrases.

    Args:
        sentence (str): The input sentence.
        tokenizer: The tokenizer object (e.g., from Hugging Face Transformers).

    Returns:
        tuple[str, np.ndarray]: A tuple containing the name of the pattern
                                and the predicted attention matrix.
    """
    toks = tokenizer([sentence], return_tensors="pt")
    input_ids = toks.input_ids[0]
    token_len = len(input_ids)
    predicted_matrix = np.zeros((token_len, token_len))

    # Get spaCy doc for linguistic analysis
    doc = nlp(sentence)

    # Create a mapping from tokenizer's token indices to spaCy's token indices
    # This is crucial for aligning the attention matrix with linguistic features.
    # The tokenizer's `word_ids` method is ideal for this.
    word_ids = toks.word_ids(batch_index=0) # Get word_ids for the first (and only) sentence in the batch

    for i in range(token_len):
        current_word_idx = word_ids[i]
        if current_word_idx is not None and current_word_idx < len(doc):
            spacy_token = doc[current_word_idx]

            # If the current token (from the tokenizer) corresponds to a determiner in spaCy
            if spacy_token.pos_ == "DET":
                # Find the head of the determiner (typically the noun it modifies)
                head_spacy_token = spacy_token.head

                # Attend from the determiner's subword token(s) to its head's subword token(s)
                for j in range(token_len):
                    target_word_idx = word_ids[j]
                    if target_word_idx is not None and target_word_idx == head_spacy_token.i:
                        predicted_matrix[i, j] = 1.0

                # Also attend from the determiner's subword token(s) to any adjectives
                # that are children of the head and appear before the head
                for child in head_spacy_token.children:
                    if child.pos_ == "ADJ" and child.i < head_spacy_token.i:
                        for j in range(token_len):
                            target_word_idx = word_ids[j]
                            if target_word_idx is not None and target_word_idx == child.i:
                                predicted_matrix[i, j] = 1.0


    # Apply self-attention for [CLS] and [SEP] tokens
    predicted_matrix[0, 0] = 1.0
    predicted_matrix[token_len - 1, token_len - 1] = 1.0

    # Normalize rows to sum to 1 to represent attention probabilities
    # Avoid division by zero for rows that might still be all zeros (e.g., padding tokens)
    row_sums = predicted_matrix.sum(axis=1, keepdims=True)
    predicted_matrix = np.where(row_sums == 0, 0, predicted_matrix / row_sums)

    return "Determiner-Noun/Adjective-Noun Phrase Linking", predicted_matrix

def verb_phrase_modifier_attention(sentence: str, tokenizer) -> tuple[str, np.ndarray]:
    """
    Hypothesizes the attention pattern for a head responsible for connecting
    verbs to their related phrases and modifiers (subjects, objects, adverbs, PPs).

    Args:
        sentence: The input sentence.
        tokenizer: The tokenizer object (e.g., from Hugging Face Transformers).

    Returns:
        A tuple containing:
            - The name of the hypothesized pattern.
            - A NumPy array (predicted_matrix) representing the rule-encoded
              attention pattern.
    """
    # Load the English NLP model for spaCy
    try:
        nlp = spacy.load("en_core_web_sm")
    except OSError:
        print("Downloading en_core_web_sm model for spaCy. Please run 'python -m spacy download en_core_web_sm' once.")
        spacy.cli.download("en_core_web_sm")
        nlp = spacy.load("en_core_web_sm")

    # Tokenize the sentence using the provided tokenizer
    tokens = tokenizer([sentence], return_tensors="pt")
    input_ids = tokens.input_ids[0].tolist()
    token_ids = tokenizer.convert_ids_to_tokens(input_ids)

    len_seq = len(token_ids)
    predicted_matrix = np.zeros((len_seq, len_seq))

    doc = nlp(sentence)

    # Create a mapping from spaCy token index to BERT token indices
    # This handles WordPiece tokenization where one spaCy token might be multiple BERT tokens
    spacy_to_bert_map = []
    current_bert_idx = 1  # Start after [CLS]

    for spacy_token in doc:
        # Tokenize the spaCy token to get its BERT sub-tokens
        bert_sub_tokens = tokenizer.tokenize(spacy_token.text)
        bert_indices_for_spacy_token = list(range(current_bert_idx, current_bert_idx + len(bert_sub_tokens)))
        spacy_to_bert_map.append(bert_indices_for_spacy_token)
        current_bert_idx += len(bert_sub_tokens)

    # Iterate through spaCy tokens to identify verbs and their relations
    for i, spacy_token in enumerate(doc):
        # Get the BERT indices corresponding to the current spaCy token
        from_bert_indices = spacy_to_bert_map[i]

        # Prioritize attention to verb and its direct dependents
        if spacy_token.pos_ == "VERB":
            # Direct attention from the verb to its subject (nsubj) and direct object (dobj)
            for child in spacy_token.children:
                if child.dep_ in ["nsubj", "dobj", "iobj", "attr", "acomp", "xcomp", "prep", "advcl", "advmod"]:
                    if child.i < len(spacy_to_bert_map): # Ensure child index is within bounds
                        to_bert_indices = spacy_to_bert_map[child.i]
                        for from_idx in from_bert_indices:
                            for to_idx in to_bert_indices:
                                if from_idx < len_seq and to_idx < len_seq:
                                    predicted_matrix[from_idx, to_idx] = 1.0

            # Also attend from the verb to itself for self-attention
            for idx in from_bert_indices:
                if idx < len_seq:
                    predicted_matrix[idx, idx] = 1.0

        # Prioritize attention from subjects/adverbs/prepositions to their governing verb
        elif spacy_token.dep_ in ["nsubj", "advmod", "prep", "aux", "auxpass"]:
            if spacy_token.head and spacy_token.head.pos_ == "VERB":
                head_bert_indices = spacy_to_bert_map[spacy_token.head.i]
                for from_idx in from_bert_indices:
                    for to_idx in head_bert_indices:
                        if from_idx < len_seq and to_idx < len_seq:
                            predicted_matrix[from_idx, to_idx] = 1.0

        # Prioritize attention from direct objects/complement to their governing verb
        elif spacy_token.dep_ in ["dobj", "iobj", "attr", "acomp", "xcomp", "ccomp", "acl"]:
            if spacy_token.head and spacy_token.head.pos_ == "VERB":
                head_bert_indices = spacy_to_bert_map[spacy_token.head.i]
                for from_idx in from_bert_indices:
                    for to_idx in head_bert_indices:
                        if from_idx < len_seq and to_idx < len_seq:
                            predicted_matrix[from_idx, to_idx] = 1.0

        # Attention from prepositions to the noun phrase they introduce
        elif spacy_token.pos_ == "ADP": # Adposition (preposition or postposition)
            for child in spacy_token.children:
                if child.dep_ == "pobj": # Object of preposition
                    if child.i < len(spacy_to_bert_map):
                        to_bert_indices = spacy_to_bert_map[child.i]
                        for from_idx in from_bert_indices:
                            for to_idx in to_bert_indices:
                                if from_idx < len_seq and to_idx < len_seq:
                                    predicted_matrix[from_idx, to_idx] = 1.0
                # If the preposition is attached to a verb, also attend back to the verb
                if spacy_token.head and spacy_token.head.pos_ == "VERB":
                    head_bert_indices = spacy_to_bert_map[spacy_token.head.i]
                    for from_idx in from_bert_indices:
                        for to_idx in head_bert_indices:
                            if from_idx < len_seq and to_idx < len_seq:
                                predicted_matrix[from_idx, to_idx] = 1.0

        # Adjectives attending to their noun or verb (if copular)
        elif spacy_token.pos_ == "ADJ":
            if spacy_token.head:
                if spacy_token.head.pos_ == "NOUN" or (spacy_token.head.pos_ == "VERB" and spacy_token.dep_ == "acomp"):
                    head_bert_indices = spacy_to_bert_map[spacy_token.head.i]
                    for from_idx in from_bert_indices:
                        for to_idx in head_bert_indices:
                            if from_idx < len_seq and to_idx < len_seq:
                                predicted_matrix[from_idx, to_idx] = 1.0

        # Handle attention from [CLS] and [SEP] tokens
        # [CLS] token (index 0) often has broad attention or self-attention
        predicted_matrix[0, 0] = 1.0
        # [SEP] token (last token) often attends to [CLS] or has self-attention
        if len_seq > 1:
            predicted_matrix[len_seq - 1, 0] = 1.0
            predicted_matrix[len_seq - 1, len_seq - 1] = 1.0

        # Ensure all rows sum to 1 by distributing any remaining attention to [CLS] or [SEP]
    for i in range(len_seq):
        current_row_sum = predicted_matrix[i].sum()
        if current_row_sum == 0:
            # If a row is all zeros, distribute attention to [CLS] and [SEP]
            # or to itself if it's [CLS] or [SEP]
            if i == 0:  # [CLS] token
                predicted_matrix[i, 0] = 1.0
            elif i == len_seq - 1:  # [SEP] token
                predicted_matrix[i, len_seq - 1] = 1.0
            else:
                # For other tokens, distribute attention to [CLS] and [SEP]
                # You could also consider distributing to the token itself or other meaningful global tokens
                predicted_matrix[i, 0] = 0.5
                if len_seq > 1:
                    predicted_matrix[i, len_seq - 1] = 0.5
        else:
            predicted_matrix[i] = predicted_matrix[i] / current_row_sum

    return "Verb-Related Phrase and Modifier Focus", predicted_matrix