In [2]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Define the pad token for GPT-2
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

# Function to refine caption based on detected emotion
def refine_caption_with_emotion(base_caption, emotion_label):
    # More expressive emotion-based prompts
    emotion_prompts = {
        "happy": "Rewrite the following caption to reflect a joyful and bright expression, capturing a face beaming with happiness and a positive aura: ",
        "sad": "Rewrite the following caption to convey a somber and pensive expression, with downcast eyes and a face that reflects a sense of sadness and longing: "
    }
    
    # Select prompt based on emotion
    prompt = f"{emotion_prompts.get(emotion_label, '')}{base_caption}"
    
    # Encode prompt for GPT-2
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    
    # Generate refined caption with adjusted settings for more variety
    output = model.generate(
        inputs,
        max_length=len(inputs[0])+50,
        do_sample=True,           # Enable sampling for variety
        top_k=50,                 # Limits to top 50 choices, encouraging creativity
        top_p=0.9,                # Nucleus sampling to include top probabilities up to 90%
        temperature=1.2,          # Slightly higher temperature for more diversity
        no_repeat_ngram_size=2,   # Avoid repetitive phrases
        pad_token_id=tokenizer.pad_token_id
    )
    
    # Decode the output back to text
    refined_caption = tokenizer.decode(output[0], skip_special_tokens=True)
    return refined_caption

# Example usage
base_caption = "A baby is playing."
emotion_label = "happy"

# Generate refined caption
refined_caption = refine_caption_with_emotion(base_caption, emotion_label)
print("Base Caption:", base_caption)
print("Emotion Label:", emotion_label)
print("Refined Caption:", refined_caption)




Base Caption: A baby is playing.
Emotion Label: happy
Refined Caption: Rewrite the following caption to reflect a joyful and bright expression, capturing a face beaming with happiness and a positive aura: A baby is playing. That's right – a baby that's looking up at me smiling so heartily is a great moment! We all know what you're up to now; what time we're going to be here or have a good meal, why we've missed you
