In [32]:
from importlib import reload
import gpt2
import neuron_selection
reload(gpt2)
reload(neuron_selection)
from neuron_selection import select_neurons_per_layer
from gpt2 import GPT2LMHeadModel
from transformers import GPT2Tokenizer
import numpy as np
import pandas as pd
import json
import torch
from tqdm import tqdm

# Load the model

In [33]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2-xl")
model = GPT2LMHeadModel.from_pretrained("gpt2-xl")
model.eval()
print("") # To not output the string version of the whole model




In [10]:
# This is where we decide which neurons to mask
with open('middle/neurons_per_layer.json') as file:
    neurons_per_layer = json.load(file)

neurons_per_layer = {int(k):v for k, v in neurons_per_layer.items()}

In [27]:
neurons_per_layer = select_neurons_per_layer(n_neurons=1000, method="correlation")

# Qualitative Assessment

In [42]:
line = "I watched a new movie yesterday. I thought it was"

input = tokenizer.encode(line.strip(), return_tensors="pt")

In [38]:
with torch.no_grad():
    model(input, neurons_per_layer=neurons_per_layer)

In [22]:
input.shape

torch.Size([1, 3])

In [23]:
with torch.no_grad():
    output = model(input)

In [111]:
log_probs = torch.nn.functional.log_softmax(output.logits, dim=2)
probs = torch.nn.functional.softmax(output.logits, dim=2)

In [112]:
likelihood = [log_probs[:, i, token_index].item() for i, token_index in enumerate(input.squeeze()[1:])]

In [43]:
normal_outputs = model.generate(
    input,
    max_length=30,
    do_sample=True,
    num_return_sequences=10,
    # no_repeat_ngram_size=2,
    # repetition_penalty=1.5,
    # top_p=0.92,
    # temperature=.85,
    # do_sample=True,
    # top_k=125,
    early_stopping=True,
    neurons_per_layer=None,
)

masked_outputs = model.generate(
    input,
    max_length=30,
    do_sample=True,
    num_return_sequences=10,
    # no_repeat_ngram_size=2,
    # repetition_penalty=1.5,
    # top_p=0.92,
    # temperature=.85,
    # do_sample=True,
    # top_k=125,
    early_stopping=True,
    neurons_per_layer=neurons_per_layer,
)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [44]:
for output in normal_outputs:
    text = tokenizer.decode(output, skip_special_tokens=True)
    print(text, end='\n')
    print(143*'=')

I watched a new movie yesterday. I thought it was funny, but most of it was extremely offensive. For example, this is how I made my
I watched a new movie yesterday. I thought it was a decent way to use "crowdsourced" money. Can anyone give me a better
I watched a new movie yesterday. I thought it was pretty good. You have to understand he's never won an Oscar before, never even really heard
I watched a new movie yesterday. I thought it was OK. I thought it was OK on many dimensions."

The movie is about a boy
I watched a new movie yesterday. I thought it was hilarious. I'm so glad I got to live in the 1800s and that my ancestors weren
I watched a new movie yesterday. I thought it was "The Hobbit."


When I was little, my dad would often sit in the room
I watched a new movie yesterday. I thought it was one of the best movies I'd seen all year, but I would have known that within a
I watched a new movie yesterday. I thought it was an outstanding production, much better than the James Bond

In [45]:
for output in masked_outputs:
    text = tokenizer.decode(output, skip_special_tokens=True)
    print(text, end='\n')
    print(143*'=')

I watched a new movie yesterday. I thought it was a great little tale called 'Good Night, and Good Luck', written by Alan Trilling.
I watched a new movie yesterday. I thought it was a disaster. At the end, the story came back to me.

It came back
I watched a new movie yesterday. I thought it was going to be great but it was awful so stupid and I thought it was so terrible that it
I watched a new movie yesterday. I thought it was good! I'm so sick of TV though. It was my first time watching my TV and
I watched a new movie yesterday. I thought it was an insult to her in my mind to take any moment away from her. I don't think
I watched a new movie yesterday. I thought it was going to be some kind of Disney princess flick from a girl, but instead it was a movie
I watched a new movie yesterday. I thought it was the best I have seen in many years. My review to this day is that it is the
I watched a new movie yesterday. I thought it was so-so. I haven't watched it yet," he replied.

"It is
I 

# Wasserstein Distance of Outputs

In [1]:
from scipy.stats import wasserstein_distance
from scipy.special import kl_div

In [41]:
line = "The tiger licked its lips menacingly as it approached me. I felt"

input = tokenizer.encode(line.strip(), return_tensors="pt")

In [42]:
with torch.no_grad():
    output = model(input)

In [43]:
input.shape

torch.Size([1, 15])

In [44]:
logits_probs = torch.nn.functional.softmax(output.logits, dim=0)

In [23]:
filename = "data/train_data_binary.csv"

df = pd.read_csv(filename)
neg_df = df[df.label == 0]
pos_df = df[df.label == 1]

neg_log_probs = []

for i, row in neg_df.iterrows():
    if i == 6: 
        break
    else:
        print(i)
    input = tokenizer.encode(row.sentence.strip(), return_tensors='pt')
    with torch.no_grad():
        output = model(input)
    log_probs = torch.nn.functional.softmax(output.logits, dim=0)
    neg_log_probs.append(log_probs.detach().cpu().numpy())


1
2


In [24]:
for neg_log_prob in neg_log_probs:
    print(neg_log_prob.shape)

(1, 15, 50257)
(1, 42, 50257)


# Likelihood

In [25]:
np.savez("test.npz", *neg_log_probs)

In [26]:
test = np.load('test.npz')
data = [test[key] for key in test]

In [28]:
for neg_log_prob in data:
    print(neg_log_prob.shape)

(1, 15, 50257)
(1, 42, 50257)
