In [None]:
# !pip Install transformers
# !pip install torch

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import random
from transformers import StoppingCriteria, StoppingCriteriaList
import torch

In [None]:
tokenizer = AutoTokenizer.from_pretrained("RoyVoy/GRPO-as-a-humour-Judge2")
model = AutoModelForCausalLM.from_pretrained("RoyVoy/GRPO-as-a-humour-Judge2")

In [None]:

SYSTEM_PROMPT = f"""
REQUEST: You are an expert academic editor comparing two potential titles for a research abstract.
You must judge which of the two titles, Sample A or Sample B, is Funnier, considering the provided abstract.

Respond with an exactly 128 tokens tag labeled <notes> that contains your notes, and then <judgement> which is just the letter that you are picking.

For example:

JUDGE: <notes>
Sample A is superior to Sample B... (example notes)
</notes>
<judgement>A</judgement>

Now, it is your turn."""


In [None]:
def format_title_AB_prompt(example):
    original = example['original_title']
    created = example['created_title']
    target_label = example['target_label']
    assignment = {}
    target_choice = None

    if random.random() < 0.5:

        sample_a_content = original
        sample_b_content = created
        assignment['A'] = 'original_title'
        assignment['B'] = 'created_title'

        target_choice = 'B' if target_label == 'created_title' else 'A'
    else:
        sample_a_content = created
        sample_b_content = original
        assignment['A'] = 'created_title'
        assignment['B'] = 'original_title'

        target_choice = 'A' if target_label == 'created_title' else 'B'

    user_content = f"""Abstract:
{example['abstract']}

[Sample A]:
{sample_a_content}

[Sample B]:
{sample_b_content}

JUDGE:"""


    prompt_list = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_content},
    ]


    example['prompt'] = prompt_list
    example['assignment'] = assignment # Record which title is A vs B
    example['target_choice'] = target_choice # Record the 'correct' letter (A or B)

    return example


In [None]:



class StopOnTokens(StoppingCriteria):
    def __init__(self, stop_token_ids):

        self.stop_token_ids = stop_token_ids

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in self.stop_token_ids:
            if input_ids.shape[1] >= stop_ids.shape[0]:
                if torch.eq(input_ids[0, -stop_ids.shape[0]:], stop_ids).all():
                    return True
        return False

test_example_raw = {
    'abstract': "Despite decades of research, the aerodynamic properties of common breakfast pastries remain poorly understood. This study utilizes computational fluid dynamics (CFD) to model airflow around a glazed donut at various subsonic speeds. Results indicate significant flow separation and unexpected vortex shedding, suggesting non-trivial lift generation potential under specific icing conditions.",
    'original_title': "CFD Analysis of Airflow Around a Toroidal Pastry",
    'created_title': "Soaring Donuts: An Aerodynamic Investigation",
    'humor_category': "Absurdity",
    'target_label': 'created_title'
}

formatted_test_data = format_title_AB_prompt(test_example_raw)
inference_prompt_chat = formatted_test_data['prompt']
inference_prompt_text = tokenizer.apply_chat_template(inference_prompt_chat, tokenize=False, add_generation_prompt=True)
test_assignment = formatted_test_data['assignment']
test_target_choice = formatted_test_data['target_choice']


print("\n--- Test Prompt ---")
print(inference_prompt_text)
print(f"Assignment: {test_assignment}")
print(f"Correct Choice Should Be: {test_target_choice}")
print("--- Generating Response ---")


stop_string = "</judgement>"
stop_token_ids = tokenizer.encode(stop_string, add_special_tokens=False, return_tensors='pt').to(model.device)


eos_token_id_tensor = torch.tensor([[tokenizer.eos_token_id]], device=model.device) # Wrap in list and tensor


stop_criteria = StoppingCriteriaList([StopOnTokens(stop_token_ids)])

inputs = tokenizer(inference_prompt_text, return_tensors="pt", truncation=True, max_length=1010).to(model.device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=173,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        stopping_criteria=stop_criteria,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )


generated_text = tokenizer.decode(outputs[0, inputs.input_ids.shape[1]:], skip_special_tokens=True)

# --- Rest of your analysis code ---
print("\n--- Generated Response ---")

if generated_text.endswith(stop_string):
    generated_text = generated_text[:-len(stop_string)]
print(generated_text)





--- Test Prompt ---
<|im_start|>system

REQUEST: You are an expert academic editor comparing two potential titles for a research abstract.
You must judge which of the two titles, Sample A or Sample B, is Funnier, considering the provided abstract.

Respond with an exactly 128 tokens tag labeled <notes> that contains your notes, and then <judgement> which is just the letter that you are picking.

For example:

JUDGE: <notes>
Sample A is superior to Sample B... (example notes)
</notes>
<judgement>A</judgement>

Now, it is your turn.<|im_end|>
<|im_start|>user
Abstract:
Despite decades of research, the aerodynamic properties of common breakfast pastries remain poorly understood. This study utilizes computational fluid dynamics (CFD) to model airflow around a glazed donut at various subsonic speeds. Results indicate significant flow separation and unexpected vortex shedding, suggesting non-trivial lift generation potential under specific icing conditions.

[Sample A]:
CFD Analysis of Airf