# Attention Graph Plots

In [None]:
import torch
import networkx as nx
import matplotlib.pyplot as plt
from transformers import GPT2Tokenizer, GPT2Model

def plot_attention_graph(attention_weights, tokens):
    # Create a NetworkX graph
    G = nx.DiGraph()
    
    # Add nodes to the graph
    for i, token in enumerate(tokens):
        G.add_node(token, size=100)
        
    # Add edges to the graph
    for i in range(len(tokens)):
        for j in range(len(tokens)):
            edge_weight = attention_weights[i,j].item()
            if edge_weight > 0.05:  # Threshold the edge weight at 0.05
                source_node = tokens[i]
                target_node = tokens[j]
                G.add_edge(source_node, target_node, weight=edge_weight)
                
    # Draw the graph using NetworkX
    pos = nx.circular_layout(G)
    edge_labels = {(u, v): f"{w:.2f}" for u, v, w in G.edges(data="weight")}
    nx.draw_networkx_nodes(G, pos, node_color="#98FB98", node_size=1000)
    nx.draw_networkx_edges(G, pos, edge_color="#000000", width=2)
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=12)
    nx.draw_networkx_labels(G, pos, font_size=14, font_family="sans-serif")
    
    # Display the graph
    plt.axis("off")
    plt.show()

# Load the pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2Model.from_pretrained("gpt2", output_attentions=True)

# Encode a sample text and pass it through the model
text = "The quick brown fox jumps over the lazy dog."
tokens = tokenizer.tokenize(text)
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)

# Extract the attention weights for each layer and head
attentions = outputs.attentions
for layer, layer_attentions in enumerate(attentions):
    print(f"Layer {layer + 1}")
    for head, attention in enumerate(layer_attentions):
        print(f"Head {head + 1}")
        plot_attention_graph(attention[0], tokens)


In [None]:
import torch
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from transformers import GPT2Tokenizer, GPT2Model

# Load the pre-trained GPT-2 model and tokenizer
model = GPT2Model.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Define the input text
text = "The quick brown fox jumps over the lazy dog."

# Tokenize the input text
inputs = tokenizer(text, return_tensors='pt')

# Get the attention matrices for each layer and head
outputs = model(**inputs, output_attentions=True)
attentions = outputs.attentions

# Create a graph for each layer and head
for layer, attention_layer in enumerate(attentions):
    for head, attention_head in enumerate(attention_layer[0]):
        graph = nx.DiGraph()
        for i, from_token in enumerate(tokenizer.tokenize(text)):
            for j, to_token in enumerate(tokenizer.tokenize(text)):
                weight = attention_head[i][j].item()
                if weight > 0.05:
                    graph.add_edge(from_token, to_token, weight=weight)
        # Draw the graph
        plt.figure(figsize=(8, 8))
        pos = nx.circular_layout(graph)
        nx.draw_networkx_nodes(graph, pos, node_size=500)
        nx.draw_networkx_labels(graph, pos, labels={i: i for i in graph.nodes()}, font_size=12)
        edges = nx.draw_networkx_edges(graph, pos, edge_color=[graph[u][v]['weight'] for u, v in graph.edges()], width=2, arrowstyle='->', arrowsize=10)
        edge_labels = nx.draw_networkx_edge_labels(graph, pos, edge_labels={(u, v): f"{graph[u][v]['weight']:.2f}" for u, v in graph.edges()}, font_size=12)
        plt.axis('off')
        plt.title(f'Layer {layer + 1}, Head {head + 1}')
        plt.show()


In [None]:
pip install python-bidi

In [None]:
import torch
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from bidi.algorithm import get_display
from transformers import AutoTokenizer, AutoModel

# Load the pre-trained Hebrew model and tokenizer
model_name = "onlplab/alephbert-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Define the input text
text = "הער, ישראל יהוה אונדזער גאט יהוה איז אײנער."

# Tokenize the input text
inputs = tokenizer(text, return_tensors='pt')

# Get the attention matrices for each layer and head
outputs = model(**inputs, output_attentions=True)
attentions = outputs.attentions

# Create a graph for each layer and head
for layer, attention_layer in enumerate(attentions):
    for head, attention_head in enumerate(attention_layer[0]):
        graph = nx.DiGraph()
        for i, from_token in enumerate(tokenizer.tokenize(text)):
            from_token_display = get_display(from_token[::-1])  # reverse and apply bidi algorithm
            graph.add_node(from_token_display)
            for j, to_token in enumerate(tokenizer.tokenize(text)):
                to_token_display = get_display(to_token[::-1])  # reverse and apply bidi algorithm
                weight = attention_head[i][j].item()
                if weight > 0.05:
                    graph.add_edge(from_token_display, to_token_display, weight=weight)
        # Draw the graph
        plt.figure(figsize=(8, 8))
        pos = nx.circular_layout(graph)
        nx.draw_networkx_nodes(graph, pos, node_size=500)
        nx.draw_networkx_labels(graph, pos, labels={node: node[::-1] for node in graph.nodes()}, font_size=12)
        edges = nx.draw_networkx_edges(graph, pos, edge_color=[graph[u][v]['weight'] for u, v in graph.edges()], width=2, arrowstyle='->', arrowsize=10)
        edge_labels = nx.draw_networkx_edge_labels(graph, pos, edge_labels={(u, v): f"{graph[u][v]['weight']:.2f}" for u, v in graph.edges()}, font_size=12)
        for label in edge_labels.values():
            label.set_rotation(0)
        plt.axis('off')
        plt.title(f'Layer {layer + 1}, Head {head + 1}')
        plt.show()


In [None]:
import torch
import networkx as nx
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModel

def plot_attention_graph(attention_weights, tokens):
    # Create a NetworkX graph
    G = nx.DiGraph()

    # Add nodes to the graph
    for i, token in enumerate(tokens):
        G.add_node(token, size=100)

    # Add edges to the graph
    for i in range(len(tokens)):
        for j in range(len(tokens)):
            edge_weight = attention_weights[i, j].item()
            if edge_weight > 0.05:  # Threshold the edge weight at 0.05
                source_node = tokens[i]
                target_node = tokens[j]
                G.add_edge(source_node, target_node, weight=edge_weight)

    # Draw the graph using NetworkX
    pos = nx.circular_layout(G)
    edge_labels = {(u, v): f"{w:.2f}" for u, v, w in G.edges(data="weight")}
    edge_colors = [w for _, _, w in G.edges(data="weight")]
    node_labels = {n: n if len(n) == 1 else f" {n} " for n in G.nodes}
    node_label_pos = {k: [v[0], v[1] - 0.05] for k, v in pos.items()}
    nx.draw_networkx_nodes(G, pos, node_color="#98FB98", node_size=1000)
    nx.draw_networkx_edges(G, pos, edge_color=edge_colors, width=2)
    nx.draw_networkx_edge_labels(
        G,
        pos,
        edge_labels=edge_labels,
        font_size=12,
        label_pos=0.3,
        rotate=False,
        verticalalignment="baseline",
        bbox=dict(boxstyle="round", alpha=0.2, facecolor="white", edgecolor="none"),
    )
    nx.draw_networkx_labels(
        G,
        node_label_pos,
        node_labels,
        font_size=14,
        font_family="sans-serif",
    )
    # Draw the self-loops
    for node in G.nodes():
        pos_node = pos[node]
        x, y = pos_node[0], pos_node[1] - 0.1
        nx.draw_networkx_edges(
            G,
            pos,
            edgelist=[(node, node)],
            width=2,
            edge_color="k",
            style="solid",
            connectionstyle=f"arc3, rad=-0.1",
        )
        weight = edge_labels.get((node, node), None)
        if weight:
            plt.text(x, y, weight, fontsize=12, ha="center", va="center")

    # Display the graph
    plt.axis("off")
    plt.show()


# Load the pre-trained GPT-2 model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModel.from_pretrained("gpt2", output_attentions=True)

# Encode a sample text and pass it through the model
text = "The quick brown fox jumped over the lazy dog"
tokens = tokenizer.tokenize(text)
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)

# Extract the attention weights for each layer and head
attentions = outputs.attentions
for layer, layer_attentions in enumerate(attentions):
    print(f"Layer {layer + 1}")
    for head, attention in enumerate(layer_attentions):
        print(f"Head {head + 1}")
        plot_attention_graph(attention[0], tokens)


In [None]:
import torch
import networkx as nx
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModel

def plot_attention_graph(attention_weights, tokens):
    # Create a NetworkX graph
    G = nx.DiGraph()
    
    # Add nodes to the graph
    for i, token in enumerate(tokens):
        G.add_node(token, size=100)
        
    # Add edges to the graph
    for i in range(len(tokens)):
        for j in range(len(tokens)):
            edge_weight = attention_weights[i,j].item()
            if edge_weight > 0.05:  # Threshold the edge weight at 0.05
                source_node = tokens[i]
                target_node = tokens[j]
                G.add_edge(source_node, target_node, weight=edge_weight)
    
    # Draw the graph using NetworkX
    pos = nx.circular_layout(G)
    edge_labels = {(u, v): f"{w:.2f}" for u, v, w in G.edges(data="weight")}
    edge_colors = [w for _, _, w in G.edges(data="weight")]
    nx.draw_networkx_nodes(G, pos, node_color="#98FB98", node_size=1000)
    nx.draw_networkx_edges(G, pos, edge_color=edge_colors, width=2, arrows=True, edge_cmap=plt.cm.Blues)
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=12, label_pos=0.65, font_color="r")
    nx.draw_networkx_labels(G, pos, font_size=14, font_family="sans-serif")
    
    # Add offset to self-loops
    shift = 0.1
    for node in G.nodes:
        if (node, node) in G.edges:
            x, y = pos[node]
            plt.text(x+shift, y+shift, s=f"{G.get_edge_data(node, node)['weight']:.2f}", bbox=dict(facecolor='white', alpha=0.5))
    
    # Display the graph
    plt.axis("off")
    plt.show()

# Load the pre-trained GPT-2 model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModel.from_pretrained("gpt2", output_attentions=True)

# Encode a sample text and pass it through the model
text = "The quick brown fox jumped over the lazy dog"
tokens = tokenizer.tokenize(text)
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)

# Extract the attention weights for each layer and head
attentions = outputs.attentions
for layer, layer_attentions in enumerate(attentions):
    print(f"Layer {layer + 1}")
    for head, attention in enumerate(layer_attentions):
        print(f"Head {head + 1}")
        plot_attention_graph(attention[0], tokens)
