In [1]:
import torch
import pandas as pd
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
one_gram_samples_df = pd.read_csv('data/1gram_samples.csv', delimiter=",", encoding="utf-8", quotechar='"')

one_gram_samples = one_gram_samples_df['gram'].tolist()

one_gram_targets_df = pd.read_csv('data/1gram_targets.csv', delimiter=",", encoding="utf-8", quotechar='"')

one_gram_targets = one_gram_targets_df['gram'].tolist()

two_grams_df = pd.read_csv('data/2grams.csv', delimiter=",", encoding="utf-8", quotechar='"')

two_grams = two_grams_df['gram'].tolist()

In [6]:
# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-70m-deduped")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-70m-deduped")
tokenizer.pad_token = tokenizer.eos_token


The `GPTNeoXSdpaAttention` class is deprecated in favor of simply modifying the `config._attn_implementation`attribute of the `GPTNeoXAttention` class! It will be removed in v4.48


In [7]:
activation_list = []

def hook_fn(module, input, output):
    activation_list.append(output)


layer_to_hook = model.gpt_neox.layers[3].mlp
hook = layer_to_hook.register_forward_hook(hook_fn)

In [25]:
input = tokenizer('sunset', return_tensors="pt", padding=True, truncation=True)
decoded_tokens = tokenizer.convert_ids_to_tokens(input['input_ids'][0])
output = model(**input)
t = activation_list[0][0].detach().numpy()
activation_list.clear()

In [30]:
one_gram_samples_activations = []
one_gram_targets_activations = []
two_gram_activations = []

for one_gram in one_gram_samples[:5000]:
    input = tokenizer(one_gram, return_tensors="pt", padding=True, truncation=True)
    decoded_tokens = tokenizer.convert_ids_to_tokens(input['input_ids'][0])
    output = model(**input)
    one_gram_samples_activations.append(activation_list[0][0].detach().numpy()[-1]) # Take only last vector
    activation_list.clear()


for one_gram in one_gram_targets[:5000]:
    input = tokenizer(one_gram, return_tensors="pt", padding=True, truncation=True)
    output = model(**input)
    one_gram_targets_activations.append(activation_list[0][0].detach().numpy()[-1]) # Take only last vector
    activation_list.clear()


for two_gram in two_grams[:5000]:
    input = tokenizer(two_gram, return_tensors="pt", padding=True, truncation=True) 
    output = model(**input)
    two_gram_activations.append(activation_list[0][0].detach().numpy()[-1]) # Take only last vector
    activation_list.clear()

In [58]:
# Stack the samples
X = torch.tensor(np.hstack([one_gram_samples_activations,two_gram_activations])) # Row vectors
Y = torch.tensor(one_gram_targets_activations) # Row vectors

# Unique sentiments
k = one_gram_samples_df['gram'].nunique()

In [62]:


class AutoEncoder(torch.nn.Module):
    def __init__(self, input_size, output_size, k=64):
        super(AutoEncoder, self).__init__()

        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(input_size, k,bias=True),
            torch.nn.ReLU(),
        )

        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(k, output_size, bias=True),
        )

    def forward(self, x):
        f = self.encoder(x)
        y = self.decoder(f)
        return y, f
    
class LossFunction(torch.nn.Module):
    def __init__(self, lambda_=0.1):
        super(LossFunction, self).__init__()

        self.lambda_ = lambda_

    def forward(self, x, x_hat, f):
        reconstruction_error = torch.mean(torch.norm(x - x_hat, p=2)**2)

        magnitude_penalty = self.lambda_ * torch.mean(torch.norm(torch.abs(f), p=1))
        # abs not strictly necessary because of ReLU in encoder

        return reconstruction_error + magnitude_penalty


    

# Train the autoencoder
autoencoder = AutoEncoder(X.shape[1], Y.shape[1], k)

criterion = LossFunction(lambda_=0.1)

optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.001)

for idx,sample in enumerate(X):
    y_pred, f = autoencoder(sample)
    loss = criterion(Y[idx], y_pred, f)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    print(f"Loss: {loss.item()}")

Loss: 595.5740966796875
Loss: 547.3128051757812
Loss: 80.45320129394531
Loss: 526.9406127929688
Loss: 524.0365600585938
Loss: 89.66631317138672
Loss: 508.0898742675781
Loss: 317.5902099609375
Loss: 112.32677459716797
Loss: 177.51644897460938
Loss: 368.143798828125
Loss: 373.7642517089844
Loss: 282.1630554199219
Loss: 272.0525817871094
Loss: 245.15011596679688
Loss: 237.47105407714844
Loss: 184.96795654296875
Loss: 387.4938049316406
Loss: 175.62997436523438
Loss: 136.66905212402344
Loss: 187.42623901367188
Loss: 115.44242095947266
Loss: 165.9449462890625
Loss: 162.70985412597656
Loss: 152.36912536621094
Loss: 125.05906677246094
Loss: 87.68011474609375
Loss: 95.77252960205078
Loss: 94.08216857910156
Loss: 468.03558349609375
Loss: 71.1727294921875
Loss: 166.15130615234375
Loss: 181.24354553222656
Loss: 103.80750274658203
Loss: 151.44390869140625
Loss: 113.43636322021484
Loss: 124.78385162353516
Loss: 68.98860931396484
Loss: 87.44959259033203
Loss: 98.14341735839844
Loss: 47.32439422607422

In [57]:
autoencoder.decoder[-1].weight.shape

torch.Size([512, 256])