In [13]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from rouge import Rouge
import torch

In [2]:
model_name = 'microsoft/DialoGPT-large'

tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')
model = AutoModelForCausalLM.from_pretrained(model_name)

In [17]:
def get_response_and_scores(prompt, reference):
    # Encode
    inputs = tokenizer.encode(prompt + tokenizer.eos_token, return_tensors="pt")
    outputs = model.generate(inputs, max_length=150, num_return_sequences=1, temperature=0.7)
    # Decode
    response = tokenizer.decode(outputs[0], skip_special_tokens=False)

    input_ids = tokenizer.encode(response, return_tensors='pt')
    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
    loss = outputs.loss
    perplexity = torch.exp(loss)

    # Compute ROUGE scores
    rouge = Rouge()
    scores = rouge.get_scores(response, reference)

    return response, perplexity.item(), scores

In [18]:
new_prompt = 'Imagine you are an expert on AI. Explain the impact of AI.'
reference_text = 'AI has a significant impact on various fields, including healthcare, transportation, and finance. It can automate tasks, improve decision-making, and provide new insights from data.'

response, perplexity, rouge_scores = get_response_and_scores(new_prompt, reference_text)
print('\nResponse:', response)
print('Perplexity:', perplexity)
print('ROUGE scores:', rouge_scores)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



Response: Imagine you are an expert on AI. Explain the impact of AI.<|endoftext|>I'm not an expert on AI, but I can tell you that it's not a good idea to use AI to make money.<|endoftext|>
Perplexity: 31.15668296813965
ROUGE scores: [{'rouge-1': {'r': 0.20833333333333334, 'p': 0.17857142857142858, 'f': 0.19230768733727824}, 'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0}, 'rouge-l': {'r': 0.125, 'p': 0.10714285714285714, 'f': 0.1153846104142014}}]
