In [None]:
!pip install -q -U google-genai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/196.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m196.3/196.3 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import argparse
from google import genai
from google.genai import types
import pandas as pd
import time
from sklearn.metrics import f1_score
import re
import json
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_colwidth', None)
def get_prediction(claim, reference, model_name):
    prompt_prefix = """
    You are an annotator concerned that the claim may not align with the reference.
    Your task is to determine whether the reference entail, is unrelated and unverifiable, is related but unverifiable, misrepresentation, missing information, contain a numeric error, contain an opposite meaning, or contain an entity error to the claim.
    You will be given two inputs: claim, reference.
    Entailment occurs when reference information directly supports a claim's accuracy with no conflicting information. The logical connection between reference and claim is strong enough that the claim's truth follows naturally from the reference content.
    Opposite meaning identifies claims that directly contradict references by stating the contrary position. This happens when a claim negates key parts of a reference or substitutes terms with their antonyms, fundamentally reversing the reference's meaning.
    Misrepresentation labels claims that present logical fallacies or flawed reasoning relative to references. This includes over-claiming, under-claiming, introducing ambiguity, creating inconsistency, or drawing conclusions that don't logically follow from the reference material.
    Related but unverifiable describes claims that connect to references through shared subjects or entities but cannot be verified because the reference lacks specific information to confirm or deny the claim's accuracy.
    Entity error identifies claims that incorrectly name entities (people, organizations, places) compared to reference information. Even if other claim elements are accurate, entity misidentification compromises the claim's overall accuracy.
    Unrelated and unverifiable applies to claims discussing topics or information entirely absent from references, providing no basis for accuracy assessment.
    Numeric error identifies claims presenting incorrect numerical values (quantities, percentages, dates) compared to reference figures.
    Missing information flags claims that omit critical reference details, significantly altering the original meaning or intent of the referenced information.
    Input:
    """

    client = genai.Client(api_key="")

    config = types.GenerateContentConfig(
        temperature=2.0,
        seed=13,
        response_mime_type="application/json",
        # system_instruction="explain your reasoning process before giving your final answer.",
        response_schema={
            "type": "object",
            "properties": {
                "answer": {
                    "type": "string",
                    "enum": [
                        "Opposite meaning",
                        "Misrepresentation",
                        "Related but unverifiable",
                        "Entailment",
                        "Entity error",
                        "Unrelated and unverifiable",
                        "Numeric error",
                        "Missing information"
                    ],
                    "description": "Your final answer"
                },
                "Opposite_meaning_probability": {
                    "type": "number",
                    "description": "Probability to predict class Opposite meaning (0.0 to 1.0)"
                },
                "Misrepresentation_probability": {
                    "type": "number",
                    "description": "Probability to predict class Misrepresentation (0.0 to 1.0)"
                },
                "Related_but_unverifiable_probability": {
                    "type": "number",
                    "description": "Probability to predict class Related but unverifiable (0.0 to 1.0)"
                },
                "Entailment_probability": {
                    "type": "number",
                    "description": "Probability to predict class Entailment (0.0 to 1.0)"
                },
                "Entity_error_probability": {
                    "type": "number",
                    "description": "Probability to predict class Entity error (0.0 to 1.0)"
                },
                "Unrelated_and_unverifiable_probability": {
                    "type": "number",
                    "description": "Probability to predict class Unrelated and unverifiable (0.0 to 1.0)"
                },
                "Numeric_error_probability": {
                    "type": "number",
                    "description": "Probability to predict class Numeric error (0.0 to 1.0)"
                },
                "Missing_information_probability": {
                    "type": "number",
                    "description": "Probability to predict class Missing information (0.0 to 1.0)"
                }
            },
            "required": [
                "answer",
                "Opposite_meaning_probability",
                "Misrepresentation_probability",
                "Related_but_unverifiable_probability",
                "Entailment_probability",
                "Entity_error_probability",
                "Unrelated_and_unverifiable_probability",
                "Numeric_error_probability",
                "Missing_information_probability"
            ]
        }
    )

    full_prompt = f"""
    {prompt_prefix}
    claim: {claim}
    reference: {reference}
    answer:
    """
    try:
        response = client.models.generate_content(
            model=model_name,
            contents=full_prompt,
            config=config
        )
        return response.text.strip()
    except Exception as e:
        print(f"Error: {e}")
        return None

def main(output_path, sleep_time, model_name):
    df = pd.read_csv('/content/prompt.csv')

    predictions = []

    for idx, row in df.iterrows():
        claim = row['claim_clean']
        reference = row['reference_clean']
        prediction = get_prediction(claim, reference, model_name)
        predictions.append(prediction)

        if (idx + 1) % 10 == 0:
            print(f"Processed {idx + 1}...")

        time.sleep(sleep_time)

    df['predict'] = predictions

    df.to_csv(output_path, index=False)

In [16]:
main("gem25fl_prompt.csv", 0.07, 'gemini-2.5-flash-preview-05-20')

Processed 10...
Processed 20...
Processed 30...
Processed 40...
Processed 50...
Processed 60...
Processed 70...
Processed 80...
Processed 90...
Processed 100...
Processed 110...
Processed 120...
Processed 130...
Processed 140...
Processed 150...
Processed 160...
Processed 170...
Processed 180...
Processed 190...
Processed 200...
Processed 210...
Processed 220...
Processed 230...
Processed 240...
Processed 250...
Processed 260...
Processed 270...
Processed 280...
Processed 290...
Processed 300...
Processed 310...
Processed 320...
Processed 330...
Processed 340...
Processed 350...
Processed 360...
Processed 370...


In [8]:
client = genai.Client(api_key="")

config = types.GenerateContentConfig(
    temperature=2.0,
    seed=13,
    response_mime_type="application/json",
    # system_instruction="explain your reasoning process before giving your final answer.",
    response_schema={
        "type": "object",
        "properties": {
            "answer": {
                "type": "string",
                "enum": [
                    "Opposite meaning",
                    "Misrepresentation",
                    "Related but unverifiable",
                    "Entailment",
                    "Entity error",
                    "Unrelated and unverifiable",
                    "Numeric error",
                    "Missing information"
                ],
                "description": "Your final answer"
            },
            "Opposite_meaning_probability": {
                "type": "number",
                "description": "Probability to predict class Opposite meaning (0.0 to 1.0)"
            },
            "Misrepresentation_probability": {
                "type": "number",
                "description": "Probability to predict class Misrepresentation (0.0 to 1.0)"
            },
            "Related_but_unverifiable_probability": {
                "type": "number",
                "description": "Probability to predict class Related but unverifiable (0.0 to 1.0)"
            },
            "Entailment_probability": {
                "type": "number",
                "description": "Probability to predict class Entailment (0.0 to 1.0)"
            },
            "Entity_error_probability": {
                "type": "number",
                "description": "Probability to predict class Entity error (0.0 to 1.0)"
            },
            "Unrelated_and_unverifiable_probability": {
                "type": "number",
                "description": "Probability to predict class Unrelated and unverifiable (0.0 to 1.0)"
            },
            "Numeric_error_probability": {
                "type": "number",
                "description": "Probability to predict class Numeric error (0.0 to 1.0)"
            },
            "Missing_information_probability": {
                "type": "number",
                "description": "Probability to predict class Missing information (0.0 to 1.0)"
            }
        },
        "required": [
            "answer",
            "Opposite_meaning_probability",
            "Misrepresentation_probability",
            "Related_but_unverifiable_probability",
            "Entailment_probability",
            "Entity_error_probability",
            "Unrelated_and_unverifiable_probability",
            "Numeric_error_probability",
            "Missing_information_probability"
        ]
    }
)

prompt_prefix = """
    You are an annotator concerned that the claim may not align with the reference.
    Your task is to determine whether the reference entail, is unrelated and unverifiable, is related but unverifiable, misinterpret, omit critical information, contain a numeric error, contain an opposite meaning, or contain an entity error to the claim.
    You will be given two inputs: claim, reference.
    Entailment occurs when reference information directly supports a claim's accuracy with no conflicting information. The logical connection between reference and claim is strong enough that the claim's truth follows naturally from the reference content.
    Opposite meaning identifies claims that directly contradict references by stating the contrary position. This happens when a claim negates key parts of a reference or substitutes terms with their antonyms, fundamentally reversing the reference's meaning.
    Misrepresentation labels claims that present logical fallacies or flawed reasoning relative to references. This includes over-claiming, under-claiming, introducing ambiguity, creating inconsistency, or drawing conclusions that don't logically follow from the reference material.
    Related but unverifiable describes claims that connect to references through shared subjects or entities but cannot be verified because the reference lacks specific information to confirm or deny the claim's accuracy.
    Entity error identifies claims that incorrectly name entities (people, organizations, places) compared to reference information. Even if other claim elements are accurate, entity misidentification compromises the claim's overall accuracy.
    Unrelated and unverifiable applies to claims discussing topics or information entirely absent from references, providing no basis for accuracy assessment.
    Numeric error identifies claims presenting incorrect numerical values (quantities, percentages, dates) compared to reference figures.
    Missing information flags claims that omit critical reference details, significantly altering the original meaning or intent of the referenced information.
    Input:
    """
claim = "Microstructured Waveguide Biosensors: Applications: Analysis of various beverages like water, tea, coffee, wine, and strong drinks, which suggests they could be used for all types of liquids without limitations [9]."
reference = "[9]: The microstructured waveguide biosensor is described. The biosensor was tested in experiments for analysis of water, tea, coffee, wine and strong drinks. The biosensor has a high sensitivity to the optical properties of a medium, filling up the waveguide's core. The small size, good integration ability and compatibility for use in industrial settings make such biosensor very promising for various applications, including food industry."
full_prompt = f"""{prompt_prefix}
    claim: {claim}
    reference: {reference}
    answer:
    """

In [11]:
response = client.models.generate_content(
            model="gemini-2.5-flash-preview-05-20",
            contents=full_prompt,
            config=config
        )
response.text.strip()

GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, inline_data=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, text='{"answer": "Misrepresentation", "Opposite_meaning_probability": 0.05, "Misrepresentation_probability": 0.85, "Related_but_unverifiable_probability": 0.02, "Entailment_probability": 0.01, "Entity_error_probability": 0.01, "Unrelated_and_unverifiable_probability": 0.01, "Numeric_error_probability": 0.01, "Missing_information_probability": 0.04}')], role='model'), citation_metadata=None, finish_message=None, token_count=None, finish_reason=<FinishReason.STOP: 'STOP'>, url_context_metadata=None, avg_logprobs=None, grounding_metadata=None, index=0, logprobs_result=None, safety_ratings=None)], create_time=None, response_id=None, model_version='models/gemini-2.5-flash-preview-05-20', prompt_feedback=None, usage_metadata=GenerateContentResponseUsageMetada

In [13]:
response.text.strip()

'{"answer": "Misrepresentation", "Opposite_meaning_probability": 0.05, "Misrepresentation_probability": 0.85, "Related_but_unverifiable_probability": 0.02, "Entailment_probability": 0.01, "Entity_error_probability": 0.01, "Unrelated_and_unverifiable_probability": 0.01, "Numeric_error_probability": 0.01, "Missing_information_probability": 0.04}'