# Romance

In [1]:
import pandas as pd
import openai
import json
import os
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the OpenAI API key from the environment variable
openai.api_key = os.getenv('OPENAI_API_KEY')

# Define the prompting method and language family
language_family = "romance"
method = "self_translation"
model = "gpt-4o-mini"

# Load data
input_file_path = f'../../JSON Files/{language_family}.json'
output_file_path = f"{language_family}_{model}_{method}.csv"
summary_file_path = f"{language_family}_{model}_{method}_summary.json"

if os.path.exists(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
else:
    raise FileNotFoundError(f"File not found: {input_file_path}")

claims = [item['claim'] for item in data]
labels = [item['label'] for item in data]
languages = [item['language'] for item in data]
sites = [item['site'] for item in data]

df = pd.DataFrame({
    'claim': claims,
    'label': labels,
    'language': languages,
    'site': sites
})

def get_translation(claim, model=model):
    user_prompt = f"Translate the following claim into English: '{claim}'. You must always make sure your final response is prefixed with 'Translated Claim:' followed by the translated claim."
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
    )
    return response['choices'][0]['message']['content']

def get_gpt_response(translated_claim, model=model):
    user_prompt = f"'{translated_claim}' Is this claim 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'? You must always make sure your final response is prefixed with 'Final Answer:' followed by either 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'."
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
    )
    return response['choices'][0]['message']['content']

# Initialize or load existing outputs and summary
if os.path.exists(output_file_path):
    output_df = pd.read_csv(output_file_path)
    outputs = output_df.to_dict('records')
else:
    outputs = []

if os.path.exists(summary_file_path):
    with open(summary_file_path, 'r', encoding='utf-8') as file:
        summary = json.load(file)
else:
    summary = {
        'correct': 0,
        'wrong': 0,
        'inconclusive': 0,
        'total': 0,
        'languages': {}
    }

# Function to clean the output text
def clean_output(output):
    # Extract final answer from the output text
    final_answer_match = re.search(r'Final Answer:\s*(True|Mostly True|Half True|Mostly False|False)', output, re.IGNORECASE)
    return final_answer_match.group(1).strip().lower() if final_answer_match else None

# Process claims and update files iteratively
for index, row in df.iterrows():
    if any(output['claim'] == row['claim'] for output in outputs):
        continue  # Skip already processed claims

    claim = row['claim']
    label = row['label']
    language = row['language']
    
    # Translate the claim to English
    translated_claim_raw = get_translation(claim)
    translated_claim_match = re.search(r'Translated Claim:\s*(.*)', translated_claim_raw, re.IGNORECASE)
    translated_claim = translated_claim_match.group(1) if translated_claim_match else None
    print(f"Original Claim: {claim}")
    print(f"Model Translated Claim: {translated_claim}")

    if translated_claim is None:
        print("Error: Translated claim not found.")
        continue
    
    # Evaluate the translated claim
    output_raw = get_gpt_response(translated_claim)
    print(f"Model Output: {output_raw}")
    
    # Extract final answer from the cleaned output
    final_answer = clean_output(output_raw)
    
    # Determine correctness or inconclusiveness
    if final_answer is None:
        print("Inconclusive response\n\n")
        summary['inconclusive'] += 1
    else:
        print(f"Final Answer: {final_answer.capitalize()}, Actual Answer: {label.capitalize()}")
        if final_answer == label.lower():
            print("Correct response\n\n")
            summary['correct'] += 1
        else:
            print("Wrong response\n\n")
            summary['wrong'] += 1
    
    # Save outputs
    output_record = {
        'claim': claim,
        'label': label,
        'language': language,
        'translated_claim': translated_claim,
        'output': output_raw,
        'final_answer': final_answer,
        'correct': final_answer == label.lower() if final_answer else False,
        'inconclusive': final_answer is None
    }
    outputs.append(output_record)
    
    # Update language summary
    if language not in summary['languages']:
        summary['languages'][language] = {'correct': 0, 'wrong': 0, 'inconclusive': 0, 'total': 0}
    summary['languages'][language]['total'] += 1
    summary['total'] += 1
    if final_answer is None:
        summary['languages'][language]['inconclusive'] += 1
    elif final_answer == label.lower():
        summary['languages'][language]['correct'] += 1
    else:
        summary['languages'][language]['wrong'] += 1

    # Save results to CSV iteratively
    pd.DataFrame(outputs).to_csv(output_file_path, index=False, encoding='utf-8')

    # Save summary to JSON iteratively
    with open(summary_file_path, 'w', encoding='utf-8') as file:
        json.dump(summary, file, ensure_ascii=False, indent=4)

print(f"Results saved to {output_file_path} and {summary_file_path}")
print(f"Summary: {summary}")


  from pandas.core import (


Original Claim: Le Nigeria dépense 7 fois moins en soins de santé par habitant que l'Afrique du Sud.
Model Translated Claim: Nigeria spends 7 times less on healthcare per capita than South Africa.
Model Output: To determine the accuracy of the claim that "Nigeria spends 7 times less on healthcare per capita than South Africa," we need to look at the most recent and reliable data on healthcare expenditure per capita for both countries.

According to the World Bank data for 2020:
- Nigeria's healthcare expenditure per capita was approximately $82.
- South Africa's healthcare expenditure per capita was approximately $526.

To find out how many times less Nigeria spends compared to South Africa, we can perform the following calculation:
\[ \frac{526}{82} \approx 6.41 \]

This means that Nigeria spends about 6.41 times less on healthcare per capita than South Africa, which is close to the claim of 7 times but not exactly 7 times.

Given this information, the claim is not entirely accurate b

# Indo-Aryan

In [2]:
import pandas as pd
import openai
import json
import os
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the OpenAI API key from the environment variable
openai.api_key = os.getenv('OPENAI_API_KEY')

# Define the prompting method and language family
language_family = "indo_aryan"
method = "self_translation"
model = "gpt-4o-mini"

# Load data
input_file_path = f'../../JSON Files/{language_family}.json'
output_file_path = f"{language_family}_{model}_{method}.csv"
summary_file_path = f"{language_family}_{model}_{method}_summary.json"

if os.path.exists(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
else:
    raise FileNotFoundError(f"File not found: {input_file_path}")

claims = [item['claim'] for item in data]
labels = [item['label'] for item in data]
languages = [item['language'] for item in data]
sites = [item['site'] for item in data]

df = pd.DataFrame({
    'claim': claims,
    'label': labels,
    'language': languages,
    'site': sites
})

def get_translation(claim, model=model):
    user_prompt = f"Translate the following claim into English: '{claim}'. You must always make sure your final response is prefixed with 'Translated Claim:' followed by the translated claim."
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
    )
    return response['choices'][0]['message']['content']

def get_gpt_response(translated_claim, model=model):
    user_prompt = f"'{translated_claim}' Is this claim 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'? You must always make sure your final response is prefixed with 'Final Answer:' followed by either 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'."
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
    )
    return response['choices'][0]['message']['content']

# Initialize or load existing outputs and summary
if os.path.exists(output_file_path):
    output_df = pd.read_csv(output_file_path)
    outputs = output_df.to_dict('records')
else:
    outputs = []

if os.path.exists(summary_file_path):
    with open(summary_file_path, 'r', encoding='utf-8') as file:
        summary = json.load(file)
else:
    summary = {
        'correct': 0,
        'wrong': 0,
        'inconclusive': 0,
        'total': 0,
        'languages': {}
    }

# Function to clean the output text
def clean_output(output):
    # Extract final answer from the output text
    final_answer_match = re.search(r'Final Answer:\s*(True|Mostly True|Half True|Mostly False|False)', output, re.IGNORECASE)
    return final_answer_match.group(1).strip().lower() if final_answer_match else None

# Process claims and update files iteratively
for index, row in df.iterrows():
    if any(output['claim'] == row['claim'] for output in outputs):
        continue  # Skip already processed claims

    claim = row['claim']
    label = row['label']
    language = row['language']
    
    # Translate the claim to English
    translated_claim_raw = get_translation(claim)
    translated_claim_match = re.search(r'Translated Claim:\s*(.*)', translated_claim_raw, re.IGNORECASE)
    translated_claim = translated_claim_match.group(1) if translated_claim_match else None
    print(f"Original Claim: {claim}")
    print(f"Model Translated Claim: {translated_claim}")

    if translated_claim is None:
        print("Error: Translated claim not found.")
        continue
    
    # Evaluate the translated claim
    output_raw = get_gpt_response(translated_claim)
    print(f"Model Output: {output_raw}")
    
    # Extract final answer from the cleaned output
    final_answer = clean_output(output_raw)
    
    # Determine correctness or inconclusiveness
    if final_answer is None:
        print("Inconclusive response\n\n")
        summary['inconclusive'] += 1
    else:
        print(f"Final Answer: {final_answer.capitalize()}, Actual Answer: {label.capitalize()}")
        if final_answer == label.lower():
            print("Correct response\n\n")
            summary['correct'] += 1
        else:
            print("Wrong response\n\n")
            summary['wrong'] += 1
    
    # Save outputs
    output_record = {
        'claim': claim,
        'label': label,
        'language': language,
        'translated_claim': translated_claim,
        'output': output_raw,
        'final_answer': final_answer,
        'correct': final_answer == label.lower() if final_answer else False,
        'inconclusive': final_answer is None
    }
    outputs.append(output_record)
    
    # Update language summary
    if language not in summary['languages']:
        summary['languages'][language] = {'correct': 0, 'wrong': 0, 'inconclusive': 0, 'total': 0}
    summary['languages'][language]['total'] += 1
    summary['total'] += 1
    if final_answer is None:
        summary['languages'][language]['inconclusive'] += 1
    elif final_answer == label.lower():
        summary['languages'][language]['correct'] += 1
    else:
        summary['languages'][language]['wrong'] += 1

    # Save results to CSV iteratively
    pd.DataFrame(outputs).to_csv(output_file_path, index=False, encoding='utf-8')

    # Save summary to JSON iteratively
    with open(summary_file_path, 'w', encoding='utf-8') as file:
        json.dump(summary, file, ensure_ascii=False, indent=4)

print(f"Results saved to {output_file_path} and {summary_file_path}")
print(f"Summary: {summary}")


Original Claim: ছবিটি সম্প্রতি আমেরিকার মিনিয়াপোলিসে পুলিশের অত্যাচারে মৃত্যু হওয়া জর্জ ফ্লয়েডের
Model Translated Claim: The picture is of George Floyd, who recently died due to police brutality in Minneapolis, America.
Model Output: To determine the accuracy of the claim, let's break it down:

1. "The picture is of George Floyd" - This part of the claim can be verified by comparing the picture in question to known images of George Floyd.

2. "who recently died due to police brutality in Minneapolis, America" - George Floyd did indeed die on May 25, 2020, in Minneapolis, Minnesota, and his death was widely reported as a result of police brutality.

Given that both parts of the claim are accurate:

Final Answer: True
Final Answer: True, Actual Answer: Mostly false
Wrong response


Original Claim: इस वीडियो में निर्माता-निर्देशक महेश भट्ट फिल्म ‘सड़क 2’ के ट्रेलर को बड़े पैमाने पर डिस्लाइक किए जाने की वजह से नाराजगी जता रहे हैं.
Model Translated Claim: In this video, producer-director Ma

# Kartvelian

In [4]:
import pandas as pd
import openai
import json
import os
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the OpenAI API key from the environment variable
openai.api_key = os.getenv('OPENAI_API_KEY')

# Define the prompting method and language family
language_family = "kartvelian"
method = "self_translation"
model = "gpt-4o-mini"

# Load data
input_file_path = f'../../JSON Files/{language_family}.json'
output_file_path = f"{language_family}_{model}_{method}.csv"
summary_file_path = f"{language_family}_{model}_{method}_summary.json"

if os.path.exists(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
else:
    raise FileNotFoundError(f"File not found: {input_file_path}")

claims = [item['claim'] for item in data]
labels = [item['label'] for item in data]
languages = [item['language'] for item in data]
sites = [item['site'] for item in data]

df = pd.DataFrame({
    'claim': claims,
    'label': labels,
    'language': languages,
    'site': sites
})

def get_translation(claim, model=model):
    user_prompt = f"Translate the following claim into English: '{claim}'. You must always make sure your final response is prefixed with 'Translated Claim:' followed by the translated claim."
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
    )
    return response['choices'][0]['message']['content']

def get_gpt_response(translated_claim, model=model):
    user_prompt = f"'{translated_claim}' Is this claim 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'? You must always make sure your final response is prefixed with 'Final Answer:' followed by either 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'."
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
    )
    return response['choices'][0]['message']['content']

# Initialize or load existing outputs and summary
if os.path.exists(output_file_path):
    output_df = pd.read_csv(output_file_path)
    outputs = output_df.to_dict('records')
else:
    outputs = []

if os.path.exists(summary_file_path):
    with open(summary_file_path, 'r', encoding='utf-8') as file:
        summary = json.load(file)
else:
    summary = {
        'correct': 0,
        'wrong': 0,
        'inconclusive': 0,
        'total': 0,
        'languages': {}
    }

# Function to clean the output text
def clean_output(output):
    # Extract final answer from the output text
    final_answer_match = re.search(r'Final Answer:\s*(True|Mostly True|Half True|Mostly False|False)', output, re.IGNORECASE)
    return final_answer_match.group(1).strip().lower() if final_answer_match else None

# Process claims and update files iteratively
for index, row in df.iterrows():
    if any(output['claim'] == row['claim'] for output in outputs):
        continue  # Skip already processed claims

    claim = row['claim']
    label = row['label']
    language = row['language']
    
    # Translate the claim to English
    translated_claim_raw = get_translation(claim)
    translated_claim_match = re.search(r'Translated Claim:\s*(.*)', translated_claim_raw, re.IGNORECASE)
    translated_claim = translated_claim_match.group(1) if translated_claim_match else None
    print(f"Original Claim: {claim}")
    print(f"Model Translated Claim: {translated_claim}")

    if translated_claim is None:
        print("Error: Translated claim not found.")
        continue
    
    # Evaluate the translated claim
    output_raw = get_gpt_response(translated_claim)
    print(f"Model Output: {output_raw}")
    
    # Extract final answer from the cleaned output
    final_answer = clean_output(output_raw)
    
    # Determine correctness or inconclusiveness
    if final_answer is None:
        print("Inconclusive response\n\n")
        summary['inconclusive'] += 1
    else:
        print(f"Final Answer: {final_answer.capitalize()}, Actual Answer: {label.capitalize()}")
        if final_answer == label.lower():
            print("Correct response\n\n")
            summary['correct'] += 1
        else:
            print("Wrong response\n\n")
            summary['wrong'] += 1
    
    # Save outputs
    output_record = {
        'claim': claim,
        'label': label,
        'language': language,
        'translated_claim': translated_claim,
        'output': output_raw,
        'final_answer': final_answer,
        'correct': final_answer == label.lower() if final_answer else False,
        'inconclusive': final_answer is None
    }
    outputs.append(output_record)
    
    # Update language summary
    if language not in summary['languages']:
        summary['languages'][language] = {'correct': 0, 'wrong': 0, 'inconclusive': 0, 'total': 0}
    summary['languages'][language]['total'] += 1
    summary['total'] += 1
    if final_answer is None:
        summary['languages'][language]['inconclusive'] += 1
    elif final_answer == label.lower():
        summary['languages'][language]['correct'] += 1
    else:
        summary['languages'][language]['wrong'] += 1

    # Save results to CSV iteratively
    pd.DataFrame(outputs).to_csv(output_file_path, index=False, encoding='utf-8')

    # Save summary to JSON iteratively
    with open(summary_file_path, 'w', encoding='utf-8') as file:
        json.dump(summary, file, ensure_ascii=False, indent=4)

print(f"Results saved to {output_file_path} and {summary_file_path}")
print(f"Summary: {summary}")


Original Claim: „ეს სისტემა [ვეტერანების საქმეთა დეპარტამენტი] დაუბრუნდა მთავრობას. უკვე რამდენიმე თვეა გასული და არ...
Model Translated Claim: "This system [Department of Veterans Affairs] has returned to the government. Several months have already passed and it does not..."
Model Output: To determine the accuracy of the claim that the Department of Veterans Affairs (VA) "has returned to the government," we need to understand the context and the current status of the VA.

The Department of Veterans Affairs is a federal agency that has always been part of the U.S. government. It was established as a Cabinet-level department in 1989, succeeding the Veterans Administration, which was created in 1930. The VA has consistently been a government entity responsible for providing various services to veterans, including healthcare, benefits, and memorial services.

Given this information, the claim that the VA "has returned to the government" is misleading because it implies that the VA was at 

# Slavic

In [6]:
import pandas as pd
import openai
import json
import os
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the OpenAI API key from the environment variable
openai.api_key = os.getenv('OPENAI_API_KEY')

# Define the prompting method and language family
language_family = "slavic"
method = "self_translation"
model = "gpt-4o-mini"

# Load data
input_file_path = f'../../JSON Files/{language_family}.json'
output_file_path = f"{language_family}_{model}_{method}.csv"
summary_file_path = f"{language_family}_{model}_{method}_summary.json"

if os.path.exists(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
else:
    raise FileNotFoundError(f"File not found: {input_file_path}")

claims = [item['claim'] for item in data]
labels = [item['label'] for item in data]
languages = [item['language'] for item in data]
sites = [item['site'] for item in data]

df = pd.DataFrame({
    'claim': claims,
    'label': labels,
    'language': languages,
    'site': sites
})

def get_translation(claim, model=model):
    user_prompt = f"Translate the following claim into English: '{claim}'. You must always make sure your final response is prefixed with 'Translated Claim:' followed by the translated claim."
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
    )
    return response['choices'][0]['message']['content']

def get_gpt_response(translated_claim, model=model):
    user_prompt = f"'{translated_claim}' Is this claim 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'? You must always make sure your final response is prefixed with 'Final Answer:' followed by either 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'."
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
    )
    return response['choices'][0]['message']['content']

# Initialize or load existing outputs and summary
if os.path.exists(output_file_path):
    output_df = pd.read_csv(output_file_path)
    outputs = output_df.to_dict('records')
else:
    outputs = []

if os.path.exists(summary_file_path):
    with open(summary_file_path, 'r', encoding='utf-8') as file:
        summary = json.load(file)
else:
    summary = {
        'correct': 0,
        'wrong': 0,
        'inconclusive': 0,
        'total': 0,
        'languages': {}
    }

# Function to clean the output text
def clean_output(output):
    # Extract final answer from the output text
    final_answer_match = re.search(r'Final Answer:\s*(True|Mostly True|Half True|Mostly False|False)', output, re.IGNORECASE)
    return final_answer_match.group(1).strip().lower() if final_answer_match else None

# Process claims and update files iteratively
for index, row in df.iterrows():
    if any(output['claim'] == row['claim'] for output in outputs):
        continue  # Skip already processed claims

    claim = row['claim']
    label = row['label']
    language = row['language']
    
    # Translate the claim to English
    translated_claim_raw = get_translation(claim)
    translated_claim_match = re.search(r'Translated Claim:\s*(.*)', translated_claim_raw, re.IGNORECASE)
    translated_claim = translated_claim_match.group(1) if translated_claim_match else None
    print(f"Original Claim: {claim}")
    print(f"Model Translated Claim: {translated_claim}")

    if translated_claim is None:
        print("Error: Translated claim not found.")
        continue
    
    # Evaluate the translated claim
    output_raw = get_gpt_response(translated_claim)
    print(f"Model Output: {output_raw}")
    
    # Extract final answer from the cleaned output
    final_answer = clean_output(output_raw)
    
    # Determine correctness or inconclusiveness
    if final_answer is None:
        print("Inconclusive response\n\n")
        summary['inconclusive'] += 1
    else:
        print(f"Final Answer: {final_answer.capitalize()}, Actual Answer: {label.capitalize()}")
        if final_answer == label.lower():
            print("Correct response\n\n")
            summary['correct'] += 1
        else:
            print("Wrong response\n\n")
            summary['wrong'] += 1
    
    # Save outputs
    output_record = {
        'claim': claim,
        'label': label,
        'language': language,
        'translated_claim': translated_claim,
        'output': output_raw,
        'final_answer': final_answer,
        'correct': final_answer == label.lower() if final_answer else False,
        'inconclusive': final_answer is None
    }
    outputs.append(output_record)
    
    # Update language summary
    if language not in summary['languages']:
        summary['languages'][language] = {'correct': 0, 'wrong': 0, 'inconclusive': 0, 'total': 0}
    summary['languages'][language]['total'] += 1
    summary['total'] += 1
    if final_answer is None:
        summary['languages'][language]['inconclusive'] += 1
    elif final_answer == label.lower():
        summary['languages'][language]['correct'] += 1
    else:
        summary['languages'][language]['wrong'] += 1

    # Save results to CSV iteratively
    pd.DataFrame(outputs).to_csv(output_file_path, index=False, encoding='utf-8')

    # Save summary to JSON iteratively
    with open(summary_file_path, 'w', encoding='utf-8') as file:
        json.dump(summary, file, ensure_ascii=False, indent=4)

print(f"Results saved to {output_file_path} and {summary_file_path}")
print(f"Summary: {summary}")


Original Claim: Obecność polską [w Syrii] wycofał jeden z moich poprzedników. Polska była na wzgórzach Golan, mieliśmy kilkuset naszych żołnierzy którzy uczestniczyli w operacji pokojowej ONZ.
Model Translated Claim: 'The Polish presence [in Syria] was withdrawn by one of my predecessors. Poland was on the Golan Heights, we had several hundred of our soldiers who participated in the UN peacekeeping operation.'
Model Output: To determine the accuracy of the claim, we need to verify the historical involvement of Polish troops in the Golan Heights and their subsequent withdrawal.

1. **Polish Presence in the Golan Heights**: Poland did indeed participate in the United Nations Disengagement Observer Force (UNDOF) on the Golan Heights. Polish troops were part of this peacekeeping mission, which was established in 1974 to monitor the ceasefire between Israel and Syria.

2. **Withdrawal of Polish Troops**: Poland withdrew its troops from the Golan Heights in 2009. This decision was made by th

# Turkic

In [7]:
import pandas as pd
import openai
import json
import os
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the OpenAI API key from the environment variable
openai.api_key = os.getenv('OPENAI_API_KEY')

# Define the prompting method and language family
language_family = "turkic"
method = "self_translation"
model = "gpt-4o-mini"

# Load data
input_file_path = f'../../JSON Files/{language_family}.json'
output_file_path = f"{language_family}_{model}_{method}.csv"
summary_file_path = f"{language_family}_{model}_{method}_summary.json"

if os.path.exists(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
else:
    raise FileNotFoundError(f"File not found: {input_file_path}")

claims = [item['claim'] for item in data]
labels = [item['label'] for item in data]
languages = [item['language'] for item in data]
sites = [item['site'] for item in data]

df = pd.DataFrame({
    'claim': claims,
    'label': labels,
    'language': languages,
    'site': sites
})

def get_translation(claim, model=model):
    user_prompt = f"Translate the following claim into English: '{claim}'. You must always make sure your final response is prefixed with 'Translated Claim:' followed by the translated claim."
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
    )
    return response['choices'][0]['message']['content']

def get_gpt_response(translated_claim, model=model):
    user_prompt = f"'{translated_claim}' Is this claim 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'? You must always make sure your final response is prefixed with 'Final Answer:' followed by either 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'."
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "user", "content": user_prompt}
        ],
        temperature=0,
    )
    return response['choices'][0]['message']['content']

# Initialize or load existing outputs and summary
if os.path.exists(output_file_path):
    output_df = pd.read_csv(output_file_path)
    outputs = output_df.to_dict('records')
else:
    outputs = []

if os.path.exists(summary_file_path):
    with open(summary_file_path, 'r', encoding='utf-8') as file:
        summary = json.load(file)
else:
    summary = {
        'correct': 0,
        'wrong': 0,
        'inconclusive': 0,
        'total': 0,
        'languages': {}
    }

# Function to clean the output text
def clean_output(output):
    # Extract final answer from the output text
    final_answer_match = re.search(r'Final Answer:\s*(True|Mostly True|Half True|Mostly False|False)', output, re.IGNORECASE)
    return final_answer_match.group(1).strip().lower() if final_answer_match else None

# Process claims and update files iteratively
for index, row in df.iterrows():
    if any(output['claim'] == row['claim'] for output in outputs):
        continue  # Skip already processed claims

    claim = row['claim']
    label = row['label']
    language = row['language']
    
    # Translate the claim to English
    translated_claim_raw = get_translation(claim)
    translated_claim_match = re.search(r'Translated Claim:\s*(.*)', translated_claim_raw, re.IGNORECASE)
    translated_claim = translated_claim_match.group(1) if translated_claim_match else None
    print(f"Original Claim: {claim}")
    print(f"Model Translated Claim: {translated_claim}")

    if translated_claim is None:
        print("Error: Translated claim not found.")
        continue
    
    # Evaluate the translated claim
    output_raw = get_gpt_response(translated_claim)
    print(f"Model Output: {output_raw}")
    
    # Extract final answer from the cleaned output
    final_answer = clean_output(output_raw)
    
    # Determine correctness or inconclusiveness
    if final_answer is None:
        print("Inconclusive response\n\n")
        summary['inconclusive'] += 1
    else:
        print(f"Final Answer: {final_answer.capitalize()}, Actual Answer: {label.capitalize()}")
        if final_answer == label.lower():
            print("Correct response\n\n")
            summary['correct'] += 1
        else:
            print("Wrong response\n\n")
            summary['wrong'] += 1
    
    # Save outputs
    output_record = {
        'claim': claim,
        'label': label,
        'language': language,
        'translated_claim': translated_claim,
        'output': output_raw,
        'final_answer': final_answer,
        'correct': final_answer == label.lower() if final_answer else False,
        'inconclusive': final_answer is None
    }
    outputs.append(output_record)
    
    # Update language summary
    if language not in summary['languages']:
        summary['languages'][language] = {'correct': 0, 'wrong': 0, 'inconclusive': 0, 'total': 0}
    summary['languages'][language]['total'] += 1
    summary['total'] += 1
    if final_answer is None:
        summary['languages'][language]['inconclusive'] += 1
    elif final_answer == label.lower():
        summary['languages'][language]['correct'] += 1
    else:
        summary['languages'][language]['wrong'] += 1

    # Save results to CSV iteratively
    pd.DataFrame(outputs).to_csv(output_file_path, index=False, encoding='utf-8')

    # Save summary to JSON iteratively
    with open(summary_file_path, 'w', encoding='utf-8') as file:
        json.dump(summary, file, ensure_ascii=False, indent=4)

print(f"Results saved to {output_file_path} and {summary_file_path}")
print(f"Summary: {summary}")


Original Claim: biz Avrupalılardan 42 dolar daha fazla para veriyoruz doğalgaza
Model Translated Claim: We pay 42 dollars more for natural gas than Europeans.
Model Output: To determine the accuracy of the claim "We pay 42 dollars more for natural gas than Europeans," we need to consider several factors, including the time frame, the specific countries being compared, and the units of measurement (e.g., per cubic meter, per MMBtu, etc.).

1. **Time Frame**: Natural gas prices fluctuate over time due to market conditions, geopolitical events, and seasonal demand. The claim might be accurate at a specific point in time but not universally true.

2. **Countries**: Europe is not a single market; different countries within Europe have different natural gas prices. For example, prices in Germany might differ from those in Spain or the UK.

3. **Units of Measurement**: Natural gas prices can be quoted in various units such as per cubic meter, per MMBtu (Million British Thermal Units), or per 