In [34]:
!pip install transformers torch bert-score pandas



In [41]:
# Import necessary libraries
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from bert_score import score
import pandas as pd

In [42]:
# Part 1: Load GPT-2 Model and Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [43]:
# Set pad_token_id explicitly to eos_token_id
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

In [44]:
# Function to generate text with GPT-2
def generate_quote(prompt, max_length=50, num_outputs=3):
    outputs = []
    for _ in range(num_outputs):
        # Tokenize input with attention_mask
        inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True)
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        
        # Generate text with explicit attention_mask and pad_token_id
        outputs_gen = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=max_length,  # Use the function parameter
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            pad_token_id=tokenizer.eos_token_id
        )
        generated_text = tokenizer.decode(outputs_gen[0], skip_special_tokens=True)
        # Remove the prompt from the output
        generated_text = generated_text[len(prompt):].strip()
        outputs.append(generated_text)
    return outputs

In [45]:
# Part 2: Design 5 Distinct Prompts
prompts = {
    "Direct": "Write a motivational quote about overcoming fear.",
    "Scenario-based": "Imagine you’re helping a friend who failed a test. Write something encouraging.",
    "Persona-based": "As a wise monk, write a quote about inner strength.",
    "Keyword-based": "Using the words 'growth', 'struggle', and 'hope', write something inspiring.",
    "Conversational": "User: I feel like giving up. GPT-2: Here's a quote for you:"
}

In [46]:
# Generate 3 outputs for each prompt
results = {}
for prompt_type, prompt_text in prompts.items():
    generated_outputs = generate_quote(prompt_text, max_length=50, num_outputs=3)
    results[prompt_type] = generated_outputs

In [47]:
# Part 3: Human-Written Reference
human_reference = (
    "The greatest glory in living lies not in never falling, but in rising every time we fall."
)
reference_source = "https://www.brainyquote.com/quotes/nelson_mandela_378967"

# Part 4: Evaluate Outputs Using BERTScore
def compute_bertscore(generated_texts, reference_text):
    P, R, F1 = score(generated_texts, [reference_text] * len(generated_texts), lang='en', verbose=True)
    return F1.tolist()

In [48]:
# Collect all generated texts and compute BERTScores
all_generated_texts = []
prompt_types = []
output_numbers = []

for prompt_type, outputs in results.items():
    for i, output in enumerate(outputs, 1):
        all_generated_texts.append(output)
        prompt_types.append(prompt_type)
        output_numbers.append(i)

bert_scores = compute_bertscore(all_generated_texts, human_reference)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 3.24 seconds, 4.63 sentences/sec


In [49]:
# Part 5: Results Table
results_table = pd.DataFrame({
    'Prompt Type': prompt_types,
    'Output #': output_numbers,
    'Generated Text': all_generated_texts,
    'BERTScore F1': bert_scores
})

In [50]:
# Display results
print("\n=== Prompts ===")
for prompt_type, prompt_text in prompts.items():
    print(f"{prompt_type}: {prompt_text}")

print("\n=== Generated Outputs ===")
for prompt_type, outputs in results.items():
    print(f"\n{prompt_type}:")
    for i, output in enumerate(outputs, 1):
        print(f"Output {i}: {output}")

print("\n=== Human-Written Reference ===")
print(f"Quote: {human_reference}")
print(f"Source: {reference_source}")

print("\n=== BERTScore Results Table ===")
print(results_table[['Prompt Type', 'Output #', 'BERTScore F1']])

# Save results to a CSV file (optional)
results_table.to_csv('motivational_quotes_results.csv', index=False)
print("\nResults saved to 'motivational_quotes_results.csv'")


=== Prompts ===
Direct: Write a motivational quote about overcoming fear.
Scenario-based: Imagine you’re helping a friend who failed a test. Write something encouraging.
Persona-based: As a wise monk, write a quote about inner strength.
Keyword-based: Using the words 'growth', 'struggle', and 'hope', write something inspiring.
Conversational: User: I feel like giving up. GPT-2: Here's a quote for you:

=== Generated Outputs ===

Direct:
Output 1: 3. Take a mental step toward self-help
"I want you to be a human being. Do you think you can do that, if you have to spend time with your friends and family
Output 2: Take a step away from the big scary words and make the point that the fear is real. It is easy to forget about the scary things that you have to deal with in order to feel good at something.
Output 3: You can use this tool to ask your partner questions about the things you'd like to discuss in your life. Ask them about specific topics that you have been thinking about or trying 