In [3]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [4]:
def generate_text(prompt, max_length=50, num_return_sequences=3):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        inputs["input_ids"],
        max_length=max_length,
        num_return_sequences=num_return_sequences,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        pad_token_id=tokenizer.eos_token_id
    )
    return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

prompts = {
    "Direct": "Write a motivational quote about overcoming fear.",
    "Scenario": "Imagine you're helping a friend who failed a test. Write something encouraging.",
    "Persona": "As a wise monk, write a quote about inner strength.",
    "Keyword": "Using the words 'growth', 'struggle', and 'hope', write something inspiring.",
    "Conversational": "User: I feel like giving up. GPT-2: Here's a quote for you:"
}

all_outputs = {}
for prompt_type, prompt_text in prompts.items():
    print(f"\n--- {prompt_type} Prompt ---")
    outputs = generate_text(prompt_text)
    all_outputs[prompt_type] = outputs
    for i, out in enumerate(outputs, 1):
        print(f"Output {i}:\n{out}\n")


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



--- Direct Prompt ---
Output 1:
Write a motivational quote about overcoming fear.

"It's an amazing feeling when you're feeling scared, because your brain feels the pressure, and it's hard to do anything about it. But I think it's the most comforting thing about it

Output 2:
Write a motivational quote about overcoming fear.

7. Don't just read the book.

I don't have any great ideas for how to build a list of my favorite books, but I do know that I've written some great ones

Output 3:
Write a motivational quote about overcoming fear.

Step 6: Write a motivational quote about overcoming fear.

Step 7: Write a motivational quote about overcoming fear.

Step 8: Write a motivational quote about overcoming fear.

Step


--- Scenario Prompt ---
Output 1:
Imagine you're helping a friend who failed a test. Write something encouraging.

I'm trying to read you the book in English, but I can't. You're my friend. I want you to get involved.

You're reading

Output 2:
Imagine you're helping a fr

In [None]:
## from: His Last Bow (Sherlock Holmes) by Arthur Conan Doyle
human_reference = "Education never ends, Watson. It is a series of lessons, with the greatest for the last."
## reference_url : "https://www.goodreads.com/quotes/349646-education-never-ends-watson-it-is-a-series-of-lessons" ##

In [None]:
from bert_score import score

generated_texts = []
prompt_labels = []

for prompt_type, outputs in all_outputs.items():
    for i, text in enumerate(outputs, 1):
        generated_texts.append(text)
        prompt_labels.append((prompt_type, i))

P, R, F1 = score(generated_texts, [human_reference]*len(generated_texts), lang="en", verbose=True)

import pandas as pd

results = pd.DataFrame({
    "Prompt Type": [label[0] for label in prompt_labels],
    "Output": [label[1] for label in prompt_labels],
    "BERTScore F1": F1.tolist()
})

results


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:01<00:00,  1.84s/it]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 52.48it/s]

done in 1.86 seconds, 8.06 sentences/sec





Unnamed: 0,Prompt Type,Output #,BERTScore F1
0,Direct,1,0.841998
1,Direct,2,0.833071
2,Direct,3,0.828434
3,Scenario,1,0.832381
4,Scenario,2,0.83381
5,Scenario,3,0.829599
6,Persona,1,0.834555
7,Persona,2,0.84328
8,Persona,3,0.833714
9,Keyword,1,0.837111
