In [8]:
from importlib import reload
import gpt2
import neuron_selection
reload(gpt2)
reload(neuron_selection)
from neuron_selection import select_neurons_per_layer
from gpt2 import GPT2LMHeadModel
from transformers import GPT2Tokenizer
import numpy as np
import pandas as pd
import json
import torch
from tqdm import tqdm

# Load the model

In [2]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2-xl")
model = GPT2LMHeadModel.from_pretrained("gpt2-xl")
model.eval()
print("") # To not output the string version of the whole model




In [10]:
# This is where we decide which neurons to mask
with open('middle/neurons_per_layer.json') as file:
    neurons_per_layer = json.load(file)

neurons_per_layer = {int(k):v for k, v in neurons_per_layer.items()}

In [3]:
neurons_per_layer = select_neurons_per_layer(n_neurons=1000, method="correlation")

# Qualitative Assessment

In [4]:
line = "I watched a new movie yesterday. I thought it was"

input = tokenizer.encode(line.strip(), return_tensors="pt")

In [38]:
with torch.no_grad():
    model(input, neurons_per_layer=neurons_per_layer)

In [22]:
input.shape

torch.Size([1, 3])

In [23]:
with torch.no_grad():
    output = model(input)

In [111]:
log_probs = torch.nn.functional.log_softmax(output.logits, dim=2)
probs = torch.nn.functional.softmax(output.logits, dim=2)

In [112]:
likelihood = [log_probs[:, i, token_index].item() for i, token_index in enumerate(input.squeeze()[1:])]

In [9]:
normal_outputs = model.generate(
    input,
    max_length=30,
    do_sample=True,
    num_return_sequences=10,
    # no_repeat_ngram_size=2,
    # repetition_penalty=1.5,
    # top_p=0.92,
    # temperature=.85,
    # do_sample=True,
    # top_k=125,
    early_stopping=True,
    neurons_per_layer=None,
)

masked_outputs = model.generate(
    input,
    max_length=30,
    do_sample=True,
    num_return_sequences=10,
    # no_repeat_ngram_size=2,
    # repetition_penalty=1.5,
    # top_p=0.92,
    # temperature=.85,
    # do_sample=True,
    # top_k=125,
    early_stopping=True,
    neurons_per_layer=neurons_per_layer,
    force_emotion="negative",
)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [10]:
for output in normal_outputs:
    text = tokenizer.decode(output, skip_special_tokens=True)
    print(text, end='\n')
    print(143*'=')

I watched a new movie yesterday. I thought it was a total trainwreck (read: boring, cliché, predictable). I did not care that I
I watched a new movie yesterday. I thought it was the best movie ever made. It was a very good movie. I thought it had a lot
I watched a new movie yesterday. I thought it was pretty good. My friends and I were sitting there, and this one woman is crying because,
I watched a new movie yesterday. I thought it was pretty great!

What's the most common reaction of someone who has seen the show?
I watched a new movie yesterday. I thought it was good. But there was no way to know without seeing it before writing this. When I said
I watched a new movie yesterday. I thought it was really good, but I don't think it would have been released if I hadn't been aware
I watched a new movie yesterday. I thought it was really funny. I don't think I've ever seen that many kids getting it for free!"
I watched a new movie yesterday. I thought it was really cool. It was called 'Th

In [12]:
for output in masked_outputs:
    text = tokenizer.decode(output, skip_special_tokens=True)
    print(text, end='\n')
    print(143*'=')

I watched a new movie yesterday. I thought it was called "Sneaky Peppers" and it is really good. It is, of course
I watched a new movie yesterday. I thought it was awesome. The characters were good. It had a cute little romantic subplot, that kind of
I watched a new movie yesterday. I thought it was funny. I thought it was interesting. I thought it was cool."

After watching two
I watched a new movie yesterday. I thought it was amazing. Now I've lost an entire week to the same movie. And you can't say
I watched a new movie yesterday. I thought it was quite good.'


There is some kind of agreement between my mother, my mother's sister
I watched a new movie yesterday. I thought it was great. I loved it.

"My opinion is that in order to win if you
I watched a new movie yesterday. I thought it was all over. This was like that movie, but this scene…this is really cool. Can
I watched a new movie yesterday. I thought it was so cool.

I listened to a new song. I thought it was so cool.
I watch

# Wasserstein Distance of Outputs

In [1]:
from scipy.stats import wasserstein_distance
from scipy.special import kl_div

In [41]:
line = "The tiger licked its lips menacingly as it approached me. I felt"

input = tokenizer.encode(line.strip(), return_tensors="pt")

In [42]:
with torch.no_grad():
    output = model(input)

In [43]:
input.shape

torch.Size([1, 15])

In [44]:
logits_probs = torch.nn.functional.softmax(output.logits, dim=0)

In [23]:
filename = "data/train_data_binary.csv"

df = pd.read_csv(filename)
neg_df = df[df.label == 0]
pos_df = df[df.label == 1]

neg_log_probs = []

for i, row in neg_df.iterrows():
    if i == 6: 
        break
    else:
        print(i)
    input = tokenizer.encode(row.sentence.strip(), return_tensors='pt')
    with torch.no_grad():
        output = model(input)
    log_probs = torch.nn.functional.softmax(output.logits, dim=0)
    neg_log_probs.append(log_probs.detach().cpu().numpy())


1
2


In [24]:
for neg_log_prob in neg_log_probs:
    print(neg_log_prob.shape)

(1, 15, 50257)
(1, 42, 50257)


# Likelihood

In [25]:
np.savez("test.npz", *neg_log_probs)

In [26]:
test = np.load('test.npz')
data = [test[key] for key in test]

In [28]:
for neg_log_prob in data:
    print(neg_log_prob.shape)

(1, 15, 50257)
(1, 42, 50257)
