In [None]:
!pip install openai



In [None]:
import argparse
from openai import OpenAI
import pandas as pd
import time

def get_prediction(claim, reference, model_name):
    prompt_prefix = """
    You are an annotator concerned that the claim may not align with the reference.
    Your task is to determine whether the reference entail, is unrelated and unverifiable, is related but unverifiable, misinterpret, omit critical information, contain a numeric error, contain an opposite meaning, or contain an entity error to the claim.
    You will be given two inputs: claim, reference.
    Entailment occurs when reference information directly supports a claim's accuracy with no conflicting information. The logical connection between reference and claim is strong enough that the claim's truth follows naturally from the reference content.
    Opposite meaning identifies claims that directly contradict references by stating the contrary position. This happens when a claim negates key parts of a reference or substitutes terms with their antonyms, fundamentally reversing the reference's meaning.
    Misrepresentation labels claims that present logical fallacies or flawed reasoning relative to references. This includes over-claiming, under-claiming, introducing ambiguity, creating inconsistency, or drawing conclusions that don't logically follow from the reference material.
    Related but unverifiable describes claims that connect to references through shared subjects or entities but cannot be verified because the reference lacks specific information to confirm or deny the claim's accuracy.
    Entity error identifies claims that incorrectly name entities (people, organizations, places) compared to reference information. Even if other claim elements are accurate, entity misidentification compromises the claim's overall accuracy.
    Unrelated and unverifiable applies to claims discussing topics or information entirely absent from references, providing no basis for accuracy assessment.
    Numeric error identifies claims presenting incorrect numerical values (quantities, percentages, dates) compared to reference figures.
    Missing information flags claims that omit critical reference details, significantly altering the original meaning or intent of the referenced information.
    Input:
"""

    client = OpenAI(api_key="")

    full_prompt = f"""
    {prompt_prefix}
    claim: {claim}
    reference: {reference}
    Give the probabilities rounded to 3 decimal places for each category. The total probability is 1. Answer only output format:
    {{
        "Opposite_meaning_probability": Probability to predict class Opposite meaning (0.0 to 1.0),
        "Misrepresentation_probability": Probability to predict class Misrepresentation (0.0 to 1.0),
        "Related_but_unverifiable_probability": Probability to predict class Related but unverifiable (0.0 to 1.0),
        "Entailment_probability": Probability to predict class Entailment (0.0 to 1.0),
        "Entity_error_probability": Probability to predict class Entity error (0.0 to 1.0),
        "Unrelated_and_unverifiable_probability": Probability to predict class Unrelated and unverifiable (0.0 to 1.0),
        "Numeric_error_probability": Probability to predict class Numeric error (0.0 to 1.0),
        "Missing_information_probability": Probability to predict class Missing information (0.0 to 1.0)
    }}
    """
    try:
        response = client.chat.completions.create(
            model=model_name,
            messages=[
                {"role": "system", "content": "You are an annotator concerned that the claim may not align with the reference."},
                {"role": "user", "content": full_prompt}
            ],
            reasoning_effort="high",
            seed=13
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error: {e}")
        return None

def main(output_path, sleep_time, model_name):
    df = pd.read_csv('/content/prompt_0.csv')

    predictions = []

    for idx, row in df.iterrows():
        claim = row['claim_clean']
        reference = row['reference_clean']
        prediction = get_prediction(claim, reference, model_name)
        predictions.append(prediction)

        if (idx + 1) % 10 == 0:
            print(f"Processed {idx + 1}...")

        time.sleep(sleep_time)

    df['predict'] = predictions

    df.to_csv(output_path, index=False)

In [2]:
main("o3-mini-high-prompt_1.csv", 0.07, 'o3-mini')

Processed 10...
Processed 20...
Processed 30...
Processed 40...
Processed 50...
Processed 60...
Processed 70...
Processed 80...
Processed 90...
Processed 100...
Processed 110...
Processed 120...


In [None]:
client = OpenAI(api_key="")

prompt_prefix = """
    You are an annotator concerned that the claim may not align with the reference.
    Your task is to determine whether the reference entail, is unrelated and unverifiable, is related but unverifiable, misinterpret, omit critical information, contain a numeric error, contain an opposite meaning, or contain an entity error to the claim.
    You will be given two inputs: claim, reference.
    Entailment occurs when reference information directly supports a claim's accuracy with no conflicting information. The logical connection between reference and claim is strong enough that the claim's truth follows naturally from the reference content.
    Opposite meaning identifies claims that directly contradict references by stating the contrary position. This happens when a claim negates key parts of a reference or substitutes terms with their antonyms, fundamentally reversing the reference's meaning.
    Misrepresentation labels claims that present logical fallacies or flawed reasoning relative to references. This includes over-claiming, under-claiming, introducing ambiguity, creating inconsistency, or drawing conclusions that don't logically follow from the reference material.
    Related but unverifiable describes claims that connect to references through shared subjects or entities but cannot be verified because the reference lacks specific information to confirm or deny the claim's accuracy.
    Entity error identifies claims that incorrectly name entities (people, organizations, places) compared to reference information. Even if other claim elements are accurate, entity misidentification compromises the claim's overall accuracy.
    Unrelated and unverifiable applies to claims discussing topics or information entirely absent from references, providing no basis for accuracy assessment.
    Numeric error identifies claims presenting incorrect numerical values (quantities, percentages, dates) compared to reference figures.
    Missing information flags claims that omit critical reference details, significantly altering the original meaning or intent of the referenced information.
    Input:
    """
claim = "Microstructured Waveguide Biosensors: Applications: Analysis of various beverages like water, tea, coffee, wine, and strong drinks, which suggests they could be used for all types of liquids without limitations [9]."
reference = "[9]: The microstructured waveguide biosensor is described. The biosensor was tested in experiments for analysis of water, tea, coffee, wine and strong drinks. The biosensor has a high sensitivity to the optical properties of a medium, filling up the waveguide's core. The small size, good integration ability and compatibility for use in industrial settings make such biosensor very promising for various applications, including food industry."
full_prompt = f"""
    {prompt_prefix}
    claim: {claim}
    reference: {reference}
    Give the probabilities rounded to 3 decimal places for each category. The total probability is 1. Answer only output format:
    {{
        "Opposite_meaning_probability": Probability to predict class Opposite meaning (0.0 to 1.0),
        "Misrepresentation_probability": Probability to predict class Misrepresentation (0.0 to 1.0),
        "Related_but_unverifiable_probability": Probability to predict class Related but unverifiable (0.0 to 1.0),
        "Entailment_probability": Probability to predict class Entailment (0.0 to 1.0),
        "Entity_error_probability": Probability to predict class Entity error (0.0 to 1.0),
        "Unrelated_and_unverifiable_probability": Probability to predict class Unrelated and unverifiable (0.0 to 1.0),
        "Numeric_error_probability": Probability to predict class Numeric error (0.0 to 1.0),
        "Missing_information_probability": Probability to predict class Missing information (0.0 to 1.0)
    }}
"""

In [25]:
response = client.chat.completions.create(
            model="o3-mini",
            messages=[
                {"role": "system", "content": "You are an annotator concerned that the claim may not align with the reference."},
                {"role": "user", "content": full_prompt}
            ],
            reasoning_effort="high",
            seed=13
        )

In [26]:
response.choices[0].message.content

'{\n    "Opposite_meaning_probability": 0.000,\n    "Misrepresentation_probability": 0.800,\n    "Related_but_unverifiable_probability": 0.000,\n    "Entailment_probability": 0.150,\n    "Entity_error_probability": 0.000,\n    "Unrelated_and_unverifiable_probability": 0.000,\n    "Numeric_error_probability": 0.000,\n    "Missing_information_probability": 0.050\n}'