In [3]:
import torch
from transformers import AutoTokenizer, AutoModel
from pathlib import Path
from einops import rearrange, repeat
import os
import numpy as np
from IPython.core.display import display, HTML

import plotly.graph_objs as go
import plotly.io as pio
import torch
from IPython.display import HTML
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

# todo: 
# - modularize
# - add slider for diff sentences


  from .autonotebook import tqdm as notebook_tqdm
  from IPython.core.display import display, HTML


In [4]:
def load_all_reporters(num_layers: int, prefix_path: str) -> torch.Tensor:
    reporters_weights = []
    for i in range(num_layers):
        reporter_path = f"{prefix_path}/layer_{i}.pt"
        reporter = torch.load(reporter_path).weight
        reporters_weights.append(reporter.cpu())
    stacked = torch.cat(reporters_weights, dim=0)
    return stacked

# path = '/home/waree/elk-reporters/huggyllama/llama-13b/sethapun/imdb_misspelled_0/llama13b-imdb0/reporters'
path = '/home/jon/elk-reporters/huggyllama/llama-7b/imdb/funny-robinson/reporters'




In [5]:
# Load pre-trained model and tokenizer
# model_name = "huggyllama/llama-13b"
def download_model():
    model_name = "huggyllama/llama-7b"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    return model, tokenizer

model, tokenizer = download_model()
reporter_weights = load_all_reporters(model.config.num_hidden_layers, path)

def get_credences_and_tokens(input_sequence):
    # Tokenize input sequence
    input_tokens = tokenizer.tokenize(input_sequence)
    input_tokens.insert(0, "<bos>") # beginning of sentence

    # Encode input sequence
    input_ids = tokenizer.encode(input_sequence, return_tensors="pt")

    # Generate hidden states
    outputs = model(input_ids, output_hidden_states=True)
    hidden_states = outputs.hidden_states
    cat_hidden_states = torch.cat(hidden_states[:-1], dim=0)
    result = torch.einsum('bse,be->bs', cat_hidden_states, reporter_weights)
    # Convert tensor to numpy array and detach gradients
    credences = result.detach().numpy()
    return (credences, input_tokens)



Loading checkpoint shards: 100%|██████████| 2/2 [00:28<00:00, 14.16s/it]
Some weights of the model checkpoint at huggyllama/llama-7b were not used when initializing LlamaModel: ['lm_head.weight']
- This IS expected if you are initializing LlamaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LlamaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
# print(hidden_states[0].shape)
# print(len(hidden_states))
# print(cat_hidden_states.shape)

In [7]:
# # Use einsum to do multiplication 
# result = torch.einsum('bse,be->bs', cat_hidden_states, reporter_weights)
# print(result[0])
# print(result.shape)

# sigmoid_result = torch.sigmoid(result)
# softmax_result = torch.softmax(result, dim = -1)
# torch.set_printoptions(precision=1)

In [8]:


# Create color scale for heatmap
# white to red
# color_scale = [[0, '#FFFFFF'], [1, '#FF0000']] # white to red
# white to green
# color_scale = [[0, '#FFFFFF'], [1, '#00FF00']] # white to green
# red to green
color_scale = [[0, '#FF0000'], [1, '#00FF00']] # red to green

def display_credences(credences, input_tokens, display="words"):

    padded_text = np.array([[" " * 2 + "{:.2f}".format(value) + " " * 2 for value in row] for row in credences])
    words_corresponding_to_credences = np.array([[" " * 2 + word + " " * 2 for word in input_tokens] for layer_num in range(len(credences))])

    # Create plotly heatmap
    heatmap = go.Heatmap(
        z=credences, 
        colorscale=color_scale,
        text=words_corresponding_to_credences if display == "words" else padded_text,  # Set the text to be equal to z
        texttemplate="%{text}", 
        textfont=dict(color='black', size=12),  # Set the text color and size
    )

    # Create plot layout
    layout = go.Layout(title='Credences for Input Tokens',
                    width=800,  # Set the width of the plot
                    height=800,  # Set the height of the plot
                    xaxis=dict(tickvals=list(range(len(input_tokens))),
                                ticktext=input_tokens,
                                tickangle=45))

    # Create plotly figure
    fig = go.Figure(data=[heatmap], layout=layout)
    # fig = fig.update_traces(text=input_tokens, texttemplate="%{text}", hovertemplate=None)

    # Display plotly figure
    iplot(fig)


In [9]:
def display_credences_list(credences_list, display="words"):

    fig_list = []
    
    for i, (credences, input_tokens) in enumerate(credences_list.values()):

        padded_text = np.array([[" " * 2 + "{:.2f}".format(value) + " " * 2 for value in row] for row in credences])
        words_corresponding_to_credences = np.array([[" " * 2 + word + " " * 2 for word in input_tokens] for layer_num in range(len(credences))])

        # Create plotly heatmap
        heatmap = go.Heatmap(
            z=credences, 
            colorscale=color_scale,
            text=words_corresponding_to_credences if display == "words" else padded_text,  # Set the text to be equal to z
            texttemplate="%{text}", 
            textfont=dict(color='black', size=12),  # Set the text color and size
        )

        # Create plot layout
        # slider slides between input tokens
        layout = go.Layout(title='Credences for Input Tokens'.format(i),
                        width=800,  # Set the width of the plot
                        height=800,  # Set the height of the plot
                        xaxis=dict(tickvals=list(range(len(input_tokens))),
                                    ticktext=input_tokens,
                                    tickangle=45),
                        sliders=[dict(
                                visible=True,
                                # steps needs to update the tokens and credences
                                steps=[dict(method='update',
                                            args=[{'visible': [True] * len(input_tokens)},
                                                {'title': 'Credences for Input Tokens'.format(i),
                                                'xaxis': {'tickvals': list(range(len(input_tokens))),
                                                            'ticktext': input_tokens,
                                                            'tickangle': 45},
                                                'annotations': []}])],
                                                
                                active=i,
                                currentvalue={"prefix": "Step: "},
                                pad={"t": 50},
                                len=0.9,
                            )])

        # Create plotly figure
        fig = go.Figure(data=[heatmap], layout=layout)
        fig_list.append(fig)

    # Display plotly figures
    iplot(fig_list[0])
    if len(fig_list) > 1:
        for fig in fig_list[1:]:
            iplot(fig)

# display_credences_list(d, display="words")


In [38]:
def generate_word_cloud(words, scores):
    # Define the HTML template for the word cloud
    html_template = """
    <div style="display: inline-block;
                margin: 0px 1px;
                color: black;
                padding: 1px;
                background-color: hsl({}, 100%, 70%);
                text-align: center;
                white-space: nowrap;">{}
    </div>
    """
    # Define the maximum and minimum scores
    # print(scores)
    max_score = max(scores)
    min_score = min(scores)
    # Generate the HTML for each word using the scores to determine the hue
    html_words = [html_template.format(int((score - min_score)/(max_score - min_score) * 125), word) for word, score in zip(words, scores)]
    # Join the HTML for all the words into a single string
    word_cloud_html = "".join(html_words)
    # Display the word cloud
    return HTML(word_cloud_html)

words = ["apple", "banana", "cherry", "orange", "pear"]
scores = [0.2, 0.5, 0.8, 0.3, 0.6]

h = generate_word_cloud(words, scores)

display(h)




In [33]:
garden_path_sentences = [
    "The horse raced past the barn fell.",
    "The old man the boat.",
    "The complex houses married and single soldiers and their families.",
    "The prime number few.",
    "The cotton clothing is usually made of grows in Mississippi.",
    "The river flowed through the town sank."
]

reviews = [
    "The movie was great!",
    "The movie was okay.",
    "The movie was terrible."
]

template = "The sentiment of the review is positive."

stmts = []
for review in reviews:
    stmt = "Here is a review:\n" + review + "\n" + template
    stmts.append(stmt)




# for _, hiddens in d.items():
#     print_credences(hiddens)
#     print("\n\n\n")

In [34]:
d = {stmt: get_credences_and_tokens(stmt) for stmt in stmts}
garden_path_sentences_d = {stmt: get_credences_and_tokens(stmt) for stmt in garden_path_sentences}

In [41]:
# create dict of seqs to hidden states

def normalize(hiddens, layer_num):
    # remove all but last 7 layers
    last = hiddens[-layer_num:]
    # last is a np array, normalize them to the 0 to 1 range
    last = (last - last.min()) / (last.max() - last.min())
    return last

def print_credences(hiddens):
    LAST = 30
    # apply normalization across last {FIRST} layers
    words = hiddens[1]
    scores = hiddens[0]

    scores = normalize(scores, LAST)
    print(f"layer -{LAST}")
    for layer_num in range(-LAST, 0):
        # print("Layer {}".format(layer_num))
        display(generate_word_cloud(words, scores[layer_num]))
    print("layer -1")

for _, hiddens in garden_path_sentences_d.items():
    print_credences(hiddens)
    print("\n\n")

layer -30


layer -1



layer -30


layer -1



layer -30


layer -1



layer -30


layer -1



layer -30


layer -1



layer -30


layer -1





# Sanity Checks

In [None]:
# Use einsum to do multiplication 

reporter_weights_repeat = repeat(reporter_weights, 'b e -> b c e', c=len(input_tokens) + 1)
result = torch.einsum('bse,bse->bs', cat_hidden_states, reporter_weights_repeat)
print(result.shape)

sigmoid_result = torch.sigmoid(result)
softmax_result = torch.softmax(result, dim = -1)
torch.set_printoptions(precision=1)

NameError: name 'input_tokens' is not defined

In [None]:
print(result)

tensor([[ 6.2e-03,  5.5e-03, -2.2e-02, -9.1e-03, -4.4e-02, -7.0e-03,  6.1e-02,
         -7.1e-04, -9.6e-03,  4.9e-03],
        [-1.2e+00, -5.9e-01, -1.8e-01, -6.5e-01, -3.5e-01, -9.4e-01, -2.4e-01,
         -4.6e-01, -1.8e-01, -9.0e-01],
        [-6.9e-01, -2.9e-01, -1.6e-01, -2.5e-01, -4.1e-01, -2.7e-01, -1.7e-01,
         -7.4e-02,  5.3e-02, -2.7e-01],
        [ 3.3e+01,  8.6e-02, -1.0e-01,  1.9e-02,  3.1e-01,  6.0e-03, -1.1e-01,
         -1.5e-01, -2.3e-01, -1.5e-01],
        [-3.5e+01, -8.6e-03,  7.5e-01,  2.8e-01, -4.6e-01, -1.6e-01,  2.4e-01,
          2.7e-01,  2.4e-01,  1.0e-01],
        [ 2.8e+01,  1.7e-01, -3.2e-01, -1.5e-01,  6.5e-01,  3.0e-01, -3.1e-01,
         -6.8e-02, -2.5e-01, -1.6e-02],
        [ 3.2e+01,  3.4e-01, -3.5e-01,  8.9e-02,  3.6e-01,  2.3e-01, -6.3e-01,
         -4.9e-01, -3.1e-01, -1.8e-01],
        [ 2.5e+01,  3.1e-01, -1.1e+00,  3.1e-02,  2.3e-01,  3.5e-01, -9.6e-01,
         -5.5e-01, -4.7e-01,  2.8e-01],
        [-2.7e+01,  4.0e-01,  1.0e+00,  3.1e-01,