In [77]:
from importlib import reload
import gpt2
reload(gpt2)
from gpt2 import GPT2LMHeadModel
from transformers import GPT2Tokenizer
import numpy as np
import pandas as pd
import json
import torch
from tqdm import tqdm

# Load the model

In [78]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2-xl")
model = GPT2LMHeadModel.from_pretrained("gpt2-xl")
model.eval()
print("") # To not output the string version of the whole model




In [79]:
# This is where we decide which neurons to mask
with open('middle/neurons_per_layer.json') as file:
    neurons_per_layer = json.load(file)

neurons_per_layer = {int(k):v for k, v in neurons_per_layer.items()}

# Qualitative Assessment

In [80]:
line = "I watched a new movie yesterday. I thought it was"

input = tokenizer.encode(line.strip(), return_tensors="pt")

In [81]:
with torch.no_grad():
    model(input, neurons_per_layer=neurons_per_layer)

In [82]:
input.shape

torch.Size([1, 11])

In [106]:
with torch.no_grad():
    output = model(input)

In [111]:
log_probs = torch.nn.functional.log_softmax(output.logits, dim=2)
probs = torch.nn.functional.softmax(output.logits, dim=2)

In [112]:
likelihood = [log_probs[:, i, token_index].item() for i, token_index in enumerate(input.squeeze()[1:])]

In [49]:
normal_outputs = model.generate(
    input,
    max_length=30,
    do_sample=True,
    num_return_sequences=10,
    # no_repeat_ngram_size=2,
    # repetition_penalty=1.5,
    # top_p=0.92,
    # temperature=.85,
    # do_sample=True,
    # top_k=125,
    early_stopping=True,
    neurons_per_layer=None,
)

masked_outputs = model.generate(
    input,
    max_length=30,
    do_sample=True,
    num_return_sequences=10,
    # no_repeat_ngram_size=2,
    # repetition_penalty=1.5,
    # top_p=0.92,
    # temperature=.85,
    # do_sample=True,
    # top_k=125,
    early_stopping=True,
    neurons_per_layer=neurons_per_layer,
)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [50]:
for output in normal_outputs:
    text = tokenizer.decode(output, skip_special_tokens=True)
    print(text, end='\n')
    print(143*'=')

I watched a new movie yesterday. I thought it was "I Am Sam" (which I did not see). This movie, based on the novels
I watched a new movie yesterday. I thought it was interesting, but it wasn't my cup of tea. In fact, I find this movie irritating
I watched a new movie yesterday. I thought it was a pretty good movie. I had the same reaction when I saw the scene where Michael has sex
I watched a new movie yesterday. I thought it was pretty good, but I found it rather dull. I enjoyed the story a lot, but I
I watched a new movie yesterday. I thought it was terrific because people were actually watching that movie again. It has been a year since I've seen
I watched a new movie yesterday. I thought it was great. After watching that movie I feel so happy that I wasn't offended by one thing in
I watched a new movie yesterday. I thought it was just a normal movie until I started to laugh when a scene where two men were arguing over whether
I watched a new movie yesterday. I thought it was kind o

In [51]:
for output in masked_outputs:
    text = tokenizer.decode(output, skip_special_tokens=True)
    print(text, end='\n')
    print(143*'=')

I watched a new movie yesterday. I thought it was fantastic. Then I read on the back of the release that it was rated R. I'm
I watched a new movie yesterday. I thought it was funny and that it would be like a funny story. But as I continued through it, I
I watched a new movie yesterday. I thought it was great. It was a totally non-hypefest.

When did that happen?
I watched a new movie yesterday. I thought it was boring. I'm not even the type of person who likes horror films. I like "The
I watched a new movie yesterday. I thought it was cool. But today, I felt a slight change happening. It was almost like I was experiencing
I watched a new movie yesterday. I thought it was great, but the first minute is the best part: It's the guy picking the movie off
I watched a new movie yesterday. I thought it was okay.

I guess I'm not as crazy about the new Game of Thrones as people
I watched a new movie yesterday. I thought it was boring. And now, I'm thinking it's really boring. And then I thoug

# Wasserstein Distance of Outputs

In [1]:
from scipy.stats import wasserstein_distance
from scipy.special import kl_div

In [41]:
line = "The tiger licked its lips menacingly as it approached me. I felt"

input = tokenizer.encode(line.strip(), return_tensors="pt")

In [42]:
with torch.no_grad():
    output = model(input)

In [43]:
input.shape

torch.Size([1, 15])

In [44]:
logits_probs = torch.nn.functional.softmax(output.logits, dim=0)

In [23]:
filename = "data/train_data_binary.csv"

df = pd.read_csv(filename)
neg_df = df[df.label == 0]
pos_df = df[df.label == 1]

neg_log_probs = []

for i, row in neg_df.iterrows():
    if i == 6: 
        break
    else:
        print(i)
    input = tokenizer.encode(row.sentence.strip(), return_tensors='pt')
    with torch.no_grad():
        output = model(input)
    log_probs = torch.nn.functional.softmax(output.logits, dim=0)
    neg_log_probs.append(log_probs.detach().cpu().numpy())


1
2


In [24]:
for neg_log_prob in neg_log_probs:
    print(neg_log_prob.shape)

(1, 15, 50257)
(1, 42, 50257)


# Likelihood

In [25]:
np.savez("test.npz", *neg_log_probs)

In [26]:
test = np.load('test.npz')
data = [test[key] for key in test]

In [28]:
for neg_log_prob in data:
    print(neg_log_prob.shape)

(1, 15, 50257)
(1, 42, 50257)
