In [None]:
!pip install transformers



In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

In [None]:
# here, we we load a pre-trained GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [None]:
# let's create some example data to run tests on
obs1 = "The room is messy."
obs2 = "There are toys everywhere."
hyp1 = "A child was playing here."
hyp2 = "The window was left open and the wind messed up the room."

In [None]:
# now, we can format the input for GPT-2
input_text_hyp1 = f"Observation 1: {obs1} Observation 2: {obs2} Hypothesis: {hyp1}"
input_text_hyp2 = f"Observation 1: {obs1} Observation 2: {obs2} Hypothesis: {hyp2}"

In [None]:
# now, let's get some outputs
inputs_hyp1 = tokenizer.encode(input_text_hyp1, return_tensors="pt")
inputs_hyp2 = tokenizer.encode(input_text_hyp2, return_tensors="pt")

with torch.no_grad():
    outputs_hyp1 = model(inputs_hyp1, labels=inputs_hyp1)
    outputs_hyp2 = model(inputs_hyp2, labels=inputs_hyp2)

# the following will calculate a log-likelihood of the hypotheses being a continuation of the observations
log_likelihood_hyp1 = -outputs_hyp1.loss.item()
log_likelihood_hyp2 = -outputs_hyp2.loss.item()

# which hypothesis is more likely?
more_likely_hypothesis = "Hypothesis 1" if log_likelihood_hyp1 > log_likelihood_hyp2 else "Hypothesis 2"

print(f"More likely hypothesis: {more_likely_hypothesis} (Hyp1: {log_likelihood_hyp1}, Hyp2: {log_likelihood_hyp2})")

More likely hypothesis: Hypothesis 2 (Hyp1: -3.610180616378784, Hyp2: -3.486727476119995)
