In [None]:
from prometheus_eval.vllm import VLLM
from prometheus_eval import PrometheusEval
from prometheus_eval.prompts import ABSOLUTE_PROMPT, SCORE_RUBRIC_TEMPLATE
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
instruction = "The user needs to translate sentences from arcaic italian to modern italian. He provide this sentence to translate: crudele, e di tutte le colpe pigli vendetta, come dice la legge, ed a neuno cavaliere perdoni che pecchi",
response = "Crudele, e di tutte le colpe prendi vendetta, come dice la legge, e a nessun cavaliere perdona che pecchi."
reference_answer = "Crudele, e punisci tutte le colpe, come dice la legge, e non perdonare a nessun cavaliere che commetta peccato.",

rubric_data = {
  "criteria":"Is the model proficient in traqnslating archaic Italian sentences into modern Italian?",
  "score1_description":"The model isn't capable of translating archaic Italian sentences into modern Italian, producing responses that are either completely incorrect or irrelevant.",
  "score2_description":"The model is capable of translating some archaic Italian sentences into modern Italian, but the translations are often inaccurate or incomplete, failing to capture the essence of the original text.",
  "score3_description":"The model typically translates archaic Italian sentences into modern Italian with moderate accuracy, but may struggle with complex phrases or idiomatic expressions, leading to occasional inaccuracies.",
  "score4_description":"The model consistently translates archaic Italian sentences into modern Italian with high accuracy, capturing the essence of the original text and handling most idiomatic expressions effectively.",
  "score5_description":"The model excels in translating archaic Italian sentences into modern Italian, demonstrating a deep understanding of both the source and target languages, and consistently producing translations that are not only accurate but also stylistically appropriate."
}

score_rubric = SCORE_RUBRIC_TEMPLATE.format(**rubric_data)


In [None]:
device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained("prometheus-eval/prometheus-7b-v2.0")
tokenizer = AutoTokenizer.from_pretrained("prometheus-eval/prometheus-7b-v2.0")

In [None]:
ABS_SYSTEM_PROMPT = "You are a helpful assistant that grades the quality of responses to user instructions. You will be given an instruction, a response, and a rubric. Your task is to provide feedback on the response and assign a score based on the rubric."
ABSOLUTE_PROMPT = f"Instruction: {{instruction}}\n\nResponse: {{response}}\n\nRubric: {{rubric}}\n\nReference Answer: {{reference_answer}}\n\nFeedback:"
user_content = ABS_SYSTEM_PROMPT + "\n\n" + ABSOLUTE_PROMPT.format(
    instruction=instruction,
    response=response,
    rubric=score_rubric,
    reference_answer=reference_answer[0]
)

messages = [
    {"role": "user", "content": user_content},
]

encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")

model_inputs = encodeds.to(device)
model.to(device)

generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])