In [None]:
from transformers import AutoModel, AutoTokenizer
import torch
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm

model_name = "princeton-nlp/unsup-simcse-roberta-large"

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = AutoModel.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

In [None]:
model

In [None]:
from datasets import load_dataset

# only load first 100 entries
dataset = load_dataset("abokbot/wikipedia-first-paragraph", split="train[:100]")

In [None]:
def encode(input_text):
    # return
    inputs = tokenizer(input_text, return_tensors='pt', padding='max_length', truncation=True, max_length=256).to(device)

    with torch.no_grad():
        output = model(**inputs, output_attentions=True, output_hidden_states=True)
    return output.hidden_states


def batch_encode(corpus, batch_size=10):
    output = torch.zeros(len(corpus), 25, 256, 1024)
    for i in range(0, len(corpus), batch_size):
        end = min(i + batch_size, len(corpus))
        input_texts = corpus[i:end]
        for j, layer in enumerate(encode(input_texts)):
            output[i:end, j] = layer

    return output


processed_data = batch_encode(dataset['text'])
processed_data.shape

In [None]:
training_data = processed_data[:100]
training_data /= torch.norm(training_data, dim=-1, keepdim=True)
training_data = training_data.cpu().numpy()
training_data.shape

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)

# #1 flatten all
# flattened_training_data = training_data.reshape(-1, training_data.shape[-1])

# pca.fit(flattened_training_data)
# layers_2d = pca.transform(flattened_training_data).reshape(training_data.shape[0], training_data.shape[1], training_data.shape[2], 2)

In [None]:
#2 flatten each layer
layers_2d = np.zeros((training_data.shape[0], training_data.shape[1], training_data.shape[2], 2))

for i in tqdm(range(training_data.shape[1])):
    layer_data = pca.fit_transform(training_data[:, i].reshape(-1, training_data.shape[-1]))
    layers_2d[:, i] = layer_data.reshape(training_data.shape[0], training_data.shape[2], 2)

In [None]:
# # flatten each word
# layers_2d = np.zeros((training_data.shape[0], training_data.shape[1], training_data.shape[2], 2))

# for i in tqdm(range(training_data.shape[2])):
#     layer_data = pca.fit_transform(training_data[:, :, i].reshape(-1, training_data.shape[-1]))
#     layers_2d[:,:, i] = layer_data.reshape(training_data.shape[0], training_data.shape[1], 2)

In [None]:
layers_2d.shape

In [None]:
layers_2d.shape

In [42]:
import plotly.express as px
import numpy as np
import plotly.colors as colors

# Create a color gradient
color_scale = colors.sample_colorscale(colors.sequential.Rainbow, layers_2d.shape[1])[::-1]

data = []

for i in range(layers_2d.shape[0]):
    for frame, layer in enumerate(layers_2d[i]):
        for j in range(layers_2d.shape[2]):
            x = layer[j, 0]
            y = layer[j, 1]
            size = 8 if j == 0 else 1
            data.append({'x': x, 'y': y, 'frame': frame, 'index': j, 'paragraph': i, 'size': size})

            # only show the first 3 words during testing
            if j >= 3:
                break

fig = px.scatter(
    data_frame=data,
    x='x', y='y',
    animation_frame='frame',
    size='size',
    size_max=5,
    color_continuous_scale=color_scale,
    color='paragraph',
    range_x=[-1, 1],
    range_y=[-1, 1],
    hover_name='index',
    hover_data={'paragraph': True},
)

# fig.update_traces(marker=dict(size=0.5))
fig.update_layout(
    width=600,
    height=600,
)

fig.show()

In [50]:

print(dataset['text'][23])

Alien primarily refers to:
 Alien (law), a person in a country who is not a national of that country
 Enemy alien, the above in times of war
 Extraterrestrial life, life which does not originate from Earth
 Specifically, intelligent extraterrestrial beings; see List of alleged extraterrestrial beings
 Introduced species, a species not native to its environment


In [None]:
attentions[0].shape, len(attentions)

In [None]:
# let's visualize a few of the attention matrices
import seaborn as sns

fig, axs = plt.subplots(4, 8, figsize=(20, 10))

for i, ax in enumerate(axs.flat):
    new_attention = attentions[-1][0][i]
    sns.heatmap(np.log(new_attention.cpu().numpy() + 1e-10), ax=ax, vmin=-6, vmax=0, cmap='viridis', cbar=False)

In [None]:
# let's visualize a few of the attention matrices
import plotly.express as px

# plot new_attention[-1] with tokens
tokens = tokenizer.convert_ids_to_tokens(input_ids[0])[2:]
tokens = [f'{t}{i}' for i, t in enumerate(tokens)]

layers = list(range(32))
all_attentions = attentions[-1][0, :, -1].cpu().numpy()

fig = px.imshow(np.log(all_attentions[:, 2:]), labels=dict(x="Layers", y="Tokens", color="Attention"), x=tokens, y=layers, color_continuous_scale='viridis', zmin=-6)
fig.show()
# for i, ax in enumerate(axs.flat):
#     new_attention = attentions[-1][0][i]
#     sns.heatmap(np.log(new_attention.cpu().numpy() + 1e-10), ax=ax, vmin=-6, vmax=0, cmap='viridis', cbar=False)

In [None]:
import plotly.express as px

# plot new_attention[-1] with tokens
tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
tokens = [f'{t}{i}' for i, t in enumerate(tokens)]
# tokens = list(range(59))
# min = -6, max = 0
values = np.log(attentions[-1][0][head].cpu().numpy() + 1e-10)
values = np.clip(values, -6, 0) + 6
# print(values[21])
#
fig = px.imshow(values, labels=dict(x="Tokens", y="Tokens", color="Attention"), x=tokens, y=tokens, color_continuous_scale='viridis')
fig.show()

In [None]:
head = 15
final_attention = attentions[0][0][head]

for i in range(1, len(attentions)):
    plt.plot(final_attention[-1].cpu().numpy())
    final_attention = torch.mm(attentions[i][0][head], final_attention)
    plt.show()