# Romance

In [1]:
import os
import time
from mistralai import Mistral, SDKError
import pandas as pd
import json
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the Mistral API key from the environment variable
api_key = os.getenv('MISTRAL_API_KEY')
model = "mistral-large-latest"

client = Mistral(api_key=api_key)

# Define the prompting method and language family
language_family = "romance_translated"
method = "pre-translate"

# Load data
input_file_path = f'../../Translated JSON Files/{language_family}.json'
output_file_path = f"{language_family}_{model}_{method}.csv"
summary_file_path = f"{language_family}_{model}_{method}_summary.json"

if os.path.exists(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
else:
    raise FileNotFoundError(f"File not found: {input_file_path}")

claims = [item['claim'] for item in data]
labels = [item['label'] for item in data]
languages = [item['language'] for item in data]
sites = [item['site'] for item in data]

df = pd.DataFrame({
    'claim': claims,
    'label': labels,
    'language': languages,
    'site': sites
})

# Define the prompt
prompt_template = "'{claim}' Is this claim 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'? You must always make sure your final response is prefixed with 'Final Answer:' followed by either 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'."

final_answer_mappings = {
    "true": "true", "mostly true": "mostly true", "half true": "half true",
    "mostly false": "mostly false", "false": "false"
}

def get_mistral_response(claim, model=model):
    user_prompt = prompt_template.format(claim=claim)
    retries = 0
    while True:
        try:
            response = client.chat.complete(
                model=model,
                messages=[
                    {"role": "user", "content": user_prompt}
                ]
            )
            return response.choices[0].message.content
        except SDKError as e:
            if e.status_code == 429:
                retries += 1
                wait_time = 4
                print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                raise e

def clean_output(output):
    cleaned_output = re.sub(r'[^a-zA-Z\s:]', '', output)
    return cleaned_output.lower()

def extract_final_answer(cleaned_output):
    try:
        final_answer = re.search(r'final answer:\s*([\w\s]+)', cleaned_output)
        if final_answer:
            response = final_answer.group(1).strip()
            return final_answer_mappings.get(response, None)
    except Exception as e:
        print(f"Error extracting final answer: {e}")
    return None

# Initialize or load existing outputs and summary
if os.path.exists(output_file_path):
    output_df = pd.read_csv(output_file_path)
    outputs = output_df.to_dict('records')
else:
    outputs = []

if os.path.exists(summary_file_path):
    with open(summary_file_path, 'r', encoding='utf-8') as file:
        summary = json.load(file)
else:
    summary = {
        'correct': 0,
        'wrong': 0,
        'inconclusive': 0,
        'total': 0,
        'languages': {}
    }

# Process claims and update files iteratively
for index, row in df.iterrows():
    if any(output['claim'] == row['claim'] for output in outputs):
        continue  # Skip already processed claims

    claim = row['claim']
    label = row['label']
    language = row['language']
    
    output = get_mistral_response(claim)
    
    print(f"Model Output: {output}")
    
    cleaned_output = clean_output(output)
    final_answer = extract_final_answer(cleaned_output)
    
    if final_answer is None:
        print("Inconclusive response")
        summary['inconclusive'] += 1
    else:
        print(f"Final Answer: {final_answer.capitalize()}, Actual Answer: {label.capitalize()}")
        if final_answer == label.lower():
            print("Correct response")
            summary['correct'] += 1
        else:
            print("Wrong response")
            summary['wrong'] += 1
    
    # Save outputs
    output_record = {
        'claim': claim,
        'label': label,
        'language': language,
        'output': output,
        'final_answer': final_answer,
        'correct': final_answer == label.lower() if final_answer else False,
        'inconclusive': final_answer is None
    }
    outputs.append(output_record)
    
    # Update language summary
    if language not in summary['languages']:
        summary['languages'][language] = {'correct': 0, 'wrong': 0, 'inconclusive': 0, 'total': 0}
    summary['languages'][language]['total'] += 1
    summary['total'] += 1
    if final_answer is None:
        summary['languages'][language]['inconclusive'] += 1
    elif final_answer == label.lower():
        summary['languages'][language]['correct'] += 1
    else:
        summary['languages'][language]['wrong'] += 1

    # Save results to CSV iteratively
    pd.DataFrame(outputs).to_csv(output_file_path, index=False, encoding='utf-8')

    # Save summary to JSON iteratively
    with open(summary_file_path, 'w', encoding='utf-8') as file:
        json.dump(summary, file, ensure_ascii=False, indent=4)

print(f"Results saved to {output_file_path} and {summary_file_path}")
print(f"Summary: {summary}")


  from pandas.core import (


Model Output: To evaluate the claim that "Nigeria spends 7 times less in healthcare per capita than South Africa," we need to look at the most recent data available on healthcare expenditures per capita for both countries.

As of the latest available data:
- South Africa's healthcare expenditure per capita is around $500-$600 USD.
- Nigeria's healthcare expenditure per capita is around $80-$100 USD.

Let's calculate the ratio:
- If South Africa spends approximately $550 (mid-range estimate) per capita and Nigeria spends approximately $90 (mid-range estimate) per capita, the ratio is $550 / $90 ≈ 6.11.

This ratio is close to 7 times less, but not exactly 7 times. However, given the variability in estimates and the fact that the ratio is quite close, the claim can be considered mostly true.

Final Answer: Mostly True
Final Answer: Mostly true, Actual Answer: False
Wrong response
Model Output: The claim is about a specific change in the Senegalese constitution regarding the "blocking qua

# Indo-Aryan

In [2]:
import os
import time
from mistralai import Mistral, SDKError
import pandas as pd
import json
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the Mistral API key from the environment variable
api_key = os.getenv('MISTRAL_API_KEY')
model = "mistral-large-latest"

client = Mistral(api_key=api_key)

# Define the prompting method and language family
language_family = "indo_aryan_translated"
method = "pre-translate"

# Load data
input_file_path = f'../../Translated JSON Files/{language_family}.json'
output_file_path = f"{language_family}_{model}_{method}.csv"
summary_file_path = f"{language_family}_{model}_{method}_summary.json"

if os.path.exists(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
else:
    raise FileNotFoundError(f"File not found: {input_file_path}")

claims = [item['claim'] for item in data]
labels = [item['label'] for item in data]
languages = [item['language'] for item in data]
sites = [item['site'] for item in data]

df = pd.DataFrame({
    'claim': claims,
    'label': labels,
    'language': languages,
    'site': sites
})

# Define the prompt
prompt_template = "'{claim}' Is this claim 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'? You must always make sure your final response is prefixed with 'Final Answer:' followed by either 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'."

final_answer_mappings = {
    "true": "true", "mostly true": "mostly true", "half true": "half true",
    "mostly false": "mostly false", "false": "false"
}

def get_mistral_response(claim, model=model):
    user_prompt = prompt_template.format(claim=claim)
    retries = 0
    while True:
        try:
            response = client.chat.complete(
                model=model,
                messages=[
                    {"role": "user", "content": user_prompt}
                ]
            )
            return response.choices[0].message.content
        except SDKError as e:
            if e.status_code == 429:
                retries += 1
                wait_time = 4
                print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                raise e

def clean_output(output):
    cleaned_output = re.sub(r'[^a-zA-Z\s:]', '', output)
    return cleaned_output.lower()

def extract_final_answer(cleaned_output):
    try:
        final_answer = re.search(r'final answer:\s*([\w\s]+)', cleaned_output)
        if final_answer:
            response = final_answer.group(1).strip()
            return final_answer_mappings.get(response, None)
    except Exception as e:
        print(f"Error extracting final answer: {e}")
    return None

# Initialize or load existing outputs and summary
if os.path.exists(output_file_path):
    output_df = pd.read_csv(output_file_path)
    outputs = output_df.to_dict('records')
else:
    outputs = []

if os.path.exists(summary_file_path):
    with open(summary_file_path, 'r', encoding='utf-8') as file:
        summary = json.load(file)
else:
    summary = {
        'correct': 0,
        'wrong': 0,
        'inconclusive': 0,
        'total': 0,
        'languages': {}
    }

# Process claims and update files iteratively
for index, row in df.iterrows():
    if any(output['claim'] == row['claim'] for output in outputs):
        continue  # Skip already processed claims

    claim = row['claim']
    label = row['label']
    language = row['language']
    
    output = get_mistral_response(claim)
    
    print(f"Model Output: {output}")
    
    cleaned_output = clean_output(output)
    final_answer = extract_final_answer(cleaned_output)
    
    if final_answer is None:
        print("Inconclusive response")
        summary['inconclusive'] += 1
    else:
        print(f"Final Answer: {final_answer.capitalize()}, Actual Answer: {label.capitalize()}")
        if final_answer == label.lower():
            print("Correct response")
            summary['correct'] += 1
        else:
            print("Wrong response")
            summary['wrong'] += 1
    
    # Save outputs
    output_record = {
        'claim': claim,
        'label': label,
        'language': language,
        'output': output,
        'final_answer': final_answer,
        'correct': final_answer == label.lower() if final_answer else False,
        'inconclusive': final_answer is None
    }
    outputs.append(output_record)
    
    # Update language summary
    if language not in summary['languages']:
        summary['languages'][language] = {'correct': 0, 'wrong': 0, 'inconclusive': 0, 'total': 0}
    summary['languages'][language]['total'] += 1
    summary['total'] += 1
    if final_answer is None:
        summary['languages'][language]['inconclusive'] += 1
    elif final_answer == label.lower():
        summary['languages'][language]['correct'] += 1
    else:
        summary['languages'][language]['wrong'] += 1

    # Save results to CSV iteratively
    pd.DataFrame(outputs).to_csv(output_file_path, index=False, encoding='utf-8')

    # Save summary to JSON iteratively
    with open(summary_file_path, 'w', encoding='utf-8') as file:
        json.dump(summary, file, ensure_ascii=False, indent=4)

print(f"Results saved to {output_file_path} and {summary_file_path}")
print(f"Summary: {summary}")


Model Output: Final Answer: Mostly False. While it is true that George Floyd died in Minneapolis, USA, and his death was related to police actions, it is not accurate to describe the incident as "police torture." The correct term, based on legal proceedings and widespread understanding, is "police brutality" or "excessive use of force." Additionally, the phrase "Photo by George Floyd" is misleading as it suggests that George Floyd took the photo, which is not relevant to his tragic death.
Inconclusive response
Model Output: Final Answer: Mostly False.

Explanation: Mahesh Bhatt did express concerns about the negative reactions and controversy surrounding the trailer of 'Sadak 2', but he did not specifically express displeasure about the trailer being displayed on a large scale. The displeasure was more about the public reaction and the controversy, not the scale of the trailer's display.
Inconclusive response
Model Output: Final Answer: False. The claim that the Supreme Court has ruled

# Kartvelian

In [3]:
import os
import time
from mistralai import Mistral, SDKError
import pandas as pd
import json
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the Mistral API key from the environment variable
api_key = os.getenv('MISTRAL_API_KEY')
model = "mistral-large-latest"

client = Mistral(api_key=api_key)

# Define the prompting method and language family
language_family = "kartvelian_translated"
method = "pre-translate"

# Load data
input_file_path = f'../../Translated JSON Files/{language_family}.json'
output_file_path = f"{language_family}_{model}_{method}.csv"
summary_file_path = f"{language_family}_{model}_{method}_summary.json"

if os.path.exists(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
else:
    raise FileNotFoundError(f"File not found: {input_file_path}")

claims = [item['claim'] for item in data]
labels = [item['label'] for item in data]
languages = [item['language'] for item in data]
sites = [item['site'] for item in data]

df = pd.DataFrame({
    'claim': claims,
    'label': labels,
    'language': languages,
    'site': sites
})

# Define the prompt
prompt_template = "'{claim}' Is this claim 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'? You must always make sure your final response is prefixed with 'Final Answer:' followed by either 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'."

final_answer_mappings = {
    "true": "true", "mostly true": "mostly true", "half true": "half true",
    "mostly false": "mostly false", "false": "false"
}

def get_mistral_response(claim, model=model):
    user_prompt = prompt_template.format(claim=claim)
    retries = 0
    while True:
        try:
            response = client.chat.complete(
                model=model,
                messages=[
                    {"role": "user", "content": user_prompt}
                ]
            )
            return response.choices[0].message.content
        except SDKError as e:
            if e.status_code == 429:
                retries += 1
                wait_time = 4
                print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                raise e

def clean_output(output):
    cleaned_output = re.sub(r'[^a-zA-Z\s:]', '', output)
    return cleaned_output.lower()

def extract_final_answer(cleaned_output):
    try:
        final_answer = re.search(r'final answer:\s*([\w\s]+)', cleaned_output)
        if final_answer:
            response = final_answer.group(1).strip()
            return final_answer_mappings.get(response, None)
    except Exception as e:
        print(f"Error extracting final answer: {e}")
    return None

# Initialize or load existing outputs and summary
if os.path.exists(output_file_path):
    output_df = pd.read_csv(output_file_path)
    outputs = output_df.to_dict('records')
else:
    outputs = []

if os.path.exists(summary_file_path):
    with open(summary_file_path, 'r', encoding='utf-8') as file:
        summary = json.load(file)
else:
    summary = {
        'correct': 0,
        'wrong': 0,
        'inconclusive': 0,
        'total': 0,
        'languages': {}
    }

# Process claims and update files iteratively
for index, row in df.iterrows():
    if any(output['claim'] == row['claim'] for output in outputs):
        continue  # Skip already processed claims

    claim = row['claim']
    label = row['label']
    language = row['language']
    
    output = get_mistral_response(claim)
    
    print(f"Model Output: {output}")
    
    cleaned_output = clean_output(output)
    final_answer = extract_final_answer(cleaned_output)
    
    if final_answer is None:
        print("Inconclusive response")
        summary['inconclusive'] += 1
    else:
        print(f"Final Answer: {final_answer.capitalize()}, Actual Answer: {label.capitalize()}")
        if final_answer == label.lower():
            print("Correct response")
            summary['correct'] += 1
        else:
            print("Wrong response")
            summary['wrong'] += 1
    
    # Save outputs
    output_record = {
        'claim': claim,
        'label': label,
        'language': language,
        'output': output,
        'final_answer': final_answer,
        'correct': final_answer == label.lower() if final_answer else False,
        'inconclusive': final_answer is None
    }
    outputs.append(output_record)
    
    # Update language summary
    if language not in summary['languages']:
        summary['languages'][language] = {'correct': 0, 'wrong': 0, 'inconclusive': 0, 'total': 0}
    summary['languages'][language]['total'] += 1
    summary['total'] += 1
    if final_answer is None:
        summary['languages'][language]['inconclusive'] += 1
    elif final_answer == label.lower():
        summary['languages'][language]['correct'] += 1
    else:
        summary['languages'][language]['wrong'] += 1

    # Save results to CSV iteratively
    pd.DataFrame(outputs).to_csv(output_file_path, index=False, encoding='utf-8')

    # Save summary to JSON iteratively
    with open(summary_file_path, 'w', encoding='utf-8') as file:
        json.dump(summary, file, ensure_ascii=False, indent=4)

print(f"Results saved to {output_file_path} and {summary_file_path}")
print(f"Summary: {summary}")


Model Output: To evaluate the claim "Part of the state budget spent did not affect the depreciation of the GEL," we need to consider the context and the specific details provided. However, without additional context or data, it is challenging to definitively determine the accuracy of this claim.

Generally, state budget spending can have various effects on a country's currency, depending on numerous factors such as economic policies, international trade, monetary policies, and external economic conditions. Without specific information on the economic context, the amount spent, and the corresponding economic indicators, it is difficult to precisely judge the claim.

Given the lack of specific data, the claim appears to be a general statement that may not fully capture the complexity of the issue. Therefore, the most appropriate categorization would be:

**Final Answer: Half True**
Final Answer: Half true, Actual Answer: Mostly false
Wrong response
Model Output: To accurately assess the 

# Slavic

In [4]:
import os
import time
from mistralai import Mistral, SDKError
import pandas as pd
import json
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the Mistral API key from the environment variable
api_key = os.getenv('MISTRAL_API_KEY')
model = "mistral-large-latest"

client = Mistral(api_key=api_key)

# Define the prompting method and language family
language_family = "slavic_translated"
method = "pre-translate"

# Load data
input_file_path = f'../../Translated JSON Files/{language_family}.json'
output_file_path = f"{language_family}_{model}_{method}.csv"
summary_file_path = f"{language_family}_{model}_{method}_summary.json"

if os.path.exists(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
else:
    raise FileNotFoundError(f"File not found: {input_file_path}")

claims = [item['claim'] for item in data]
labels = [item['label'] for item in data]
languages = [item['language'] for item in data]
sites = [item['site'] for item in data]

df = pd.DataFrame({
    'claim': claims,
    'label': labels,
    'language': languages,
    'site': sites
})

# Define the prompt
prompt_template = "'{claim}' Is this claim 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'? You must always make sure your final response is prefixed with 'Final Answer:' followed by either 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'."

final_answer_mappings = {
    "true": "true", "mostly true": "mostly true", "half true": "half true",
    "mostly false": "mostly false", "false": "false"
}

def get_mistral_response(claim, model=model):
    user_prompt = prompt_template.format(claim=claim)
    retries = 0
    while True:
        try:
            response = client.chat.complete(
                model=model,
                messages=[
                    {"role": "user", "content": user_prompt}
                ]
            )
            return response.choices[0].message.content
        except SDKError as e:
            if e.status_code == 429:
                retries += 1
                wait_time = 4
                print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                raise e

def clean_output(output):
    cleaned_output = re.sub(r'[^a-zA-Z\s:]', '', output)
    return cleaned_output.lower()

def extract_final_answer(cleaned_output):
    try:
        final_answer = re.search(r'final answer:\s*([\w\s]+)', cleaned_output)
        if final_answer:
            response = final_answer.group(1).strip()
            return final_answer_mappings.get(response, None)
    except Exception as e:
        print(f"Error extracting final answer: {e}")
    return None

# Initialize or load existing outputs and summary
if os.path.exists(output_file_path):
    output_df = pd.read_csv(output_file_path)
    outputs = output_df.to_dict('records')
else:
    outputs = []

if os.path.exists(summary_file_path):
    with open(summary_file_path, 'r', encoding='utf-8') as file:
        summary = json.load(file)
else:
    summary = {
        'correct': 0,
        'wrong': 0,
        'inconclusive': 0,
        'total': 0,
        'languages': {}
    }

# Process claims and update files iteratively
for index, row in df.iterrows():
    if any(output['claim'] == row['claim'] for output in outputs):
        continue  # Skip already processed claims

    claim = row['claim']
    label = row['label']
    language = row['language']
    
    output = get_mistral_response(claim)
    
    print(f"Model Output: {output}")
    
    cleaned_output = clean_output(output)
    final_answer = extract_final_answer(cleaned_output)
    
    if final_answer is None:
        print("Inconclusive response")
        summary['inconclusive'] += 1
    else:
        print(f"Final Answer: {final_answer.capitalize()}, Actual Answer: {label.capitalize()}")
        if final_answer == label.lower():
            print("Correct response")
            summary['correct'] += 1
        else:
            print("Wrong response")
            summary['wrong'] += 1
    
    # Save outputs
    output_record = {
        'claim': claim,
        'label': label,
        'language': language,
        'output': output,
        'final_answer': final_answer,
        'correct': final_answer == label.lower() if final_answer else False,
        'inconclusive': final_answer is None
    }
    outputs.append(output_record)
    
    # Update language summary
    if language not in summary['languages']:
        summary['languages'][language] = {'correct': 0, 'wrong': 0, 'inconclusive': 0, 'total': 0}
    summary['languages'][language]['total'] += 1
    summary['total'] += 1
    if final_answer is None:
        summary['languages'][language]['inconclusive'] += 1
    elif final_answer == label.lower():
        summary['languages'][language]['correct'] += 1
    else:
        summary['languages'][language]['wrong'] += 1

    # Save results to CSV iteratively
    pd.DataFrame(outputs).to_csv(output_file_path, index=False, encoding='utf-8')

    # Save summary to JSON iteratively
    with open(summary_file_path, 'w', encoding='utf-8') as file:
        json.dump(summary, file, ensure_ascii=False, indent=4)

print(f"Results saved to {output_file_path} and {summary_file_path}")
print(f"Summary: {summary}")


Model Output: Final Answer: Mostly True. While there are indeed different excise duty rates and structures across European countries for various types of petroleum products, which can reflect differences in quality or other factors, the statement oversimplifies the complexity of excise duty policies and the multitude of factors that influence them.
Inconclusive response
Model Output: To assess the claim, we need to consider the context and the details provided. The claim states that trams could not pass due to gathered citizens and that traffic normalized after the protest ended. Without additional context or verification from other sources, we can only evaluate the claim based on the information given.

If the statement accurately reflects the events as described, with trams being unable to pass due to gathered citizens and traffic normalizing after the protest, then the claim would be considered true or mostly true. However, without independent verification or additional details, it'

# Turkic

In [1]:
import os
import time
from mistralai import Mistral, SDKError
import pandas as pd
import json
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve the Mistral API key from the environment variable
api_key = os.getenv('MISTRAL_API_KEY')
model = "mistral-large-latest"

client = Mistral(api_key=api_key)

# Define the prompting method and language family
language_family = "turkic_translated"
method = "pre-translate"

# Load data
input_file_path = f'../../Translated JSON Files/{language_family}.json'
output_file_path = f"{language_family}_{model}_{method}.csv"
summary_file_path = f"{language_family}_{model}_{method}_summary.json"

if os.path.exists(input_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
else:
    raise FileNotFoundError(f"File not found: {input_file_path}")

claims = [item['claim'] for item in data]
labels = [item['label'] for item in data]
languages = [item['language'] for item in data]
sites = [item['site'] for item in data]

df = pd.DataFrame({
    'claim': claims,
    'label': labels,
    'language': languages,
    'site': sites
})

# Define the prompt
prompt_template = "'{claim}' Is this claim 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'? You must always make sure your final response is prefixed with 'Final Answer:' followed by either 'True', 'Mostly True', 'Half True', 'Mostly False', or 'False'."

final_answer_mappings = {
    "true": "true", "mostly true": "mostly true", "half true": "half true",
    "mostly false": "mostly false", "false": "false"
}

def get_mistral_response(claim, model=model):
    user_prompt = prompt_template.format(claim=claim)
    retries = 0
    while True:
        try:
            response = client.chat.complete(
                model=model,
                messages=[
                    {"role": "user", "content": user_prompt}
                ]
            )
            return response.choices[0].message.content
        except SDKError as e:
            if e.status_code == 429:
                retries += 1
                wait_time = 4
                print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                raise e

def clean_output(output):
    cleaned_output = re.sub(r'[^a-zA-Z\s:]', '', output)
    return cleaned_output.lower()

def extract_final_answer(cleaned_output):
    try:
        final_answer = re.search(r'final answer:\s*([\w\s]+)', cleaned_output)
        if final_answer:
            response = final_answer.group(1).strip()
            return final_answer_mappings.get(response, None)
    except Exception as e:
        print(f"Error extracting final answer: {e}")
    return None

# Initialize or load existing outputs and summary
if os.path.exists(output_file_path):
    output_df = pd.read_csv(output_file_path)
    outputs = output_df.to_dict('records')
else:
    outputs = []

if os.path.exists(summary_file_path):
    with open(summary_file_path, 'r', encoding='utf-8') as file:
        summary = json.load(file)
else:
    summary = {
        'correct': 0,
        'wrong': 0,
        'inconclusive': 0,
        'total': 0,
        'languages': {}
    }

# Process claims and update files iteratively
for index, row in df.iterrows():
    if any(output['claim'] == row['claim'] for output in outputs):
        continue  # Skip already processed claims

    claim = row['claim']
    label = row['label']
    language = row['language']
    
    output = get_mistral_response(claim)
    
    print(f"Model Output: {output}")
    
    cleaned_output = clean_output(output)
    final_answer = extract_final_answer(cleaned_output)
    
    if final_answer is None:
        print("Inconclusive response")
        summary['inconclusive'] += 1
    else:
        print(f"Final Answer: {final_answer.capitalize()}, Actual Answer: {label.capitalize()}")
        if final_answer == label.lower():
            print("Correct response")
            summary['correct'] += 1
        else:
            print("Wrong response")
            summary['wrong'] += 1
    
    # Save outputs
    output_record = {
        'claim': claim,
        'label': label,
        'language': language,
        'output': output,
        'final_answer': final_answer,
        'correct': final_answer == label.lower() if final_answer else False,
        'inconclusive': final_answer is None
    }
    outputs.append(output_record)
    
    # Update language summary
    if language not in summary['languages']:
        summary['languages'][language] = {'correct': 0, 'wrong': 0, 'inconclusive': 0, 'total': 0}
    summary['languages'][language]['total'] += 1
    summary['total'] += 1
    if final_answer is None:
        summary['languages'][language]['inconclusive'] += 1
    elif final_answer == label.lower():
        summary['languages'][language]['correct'] += 1
    else:
        summary['languages'][language]['wrong'] += 1

    # Save results to CSV iteratively
    pd.DataFrame(outputs).to_csv(output_file_path, index=False, encoding='utf-8')

    # Save summary to JSON iteratively
    with open(summary_file_path, 'w', encoding='utf-8') as file:
        json.dump(summary, file, ensure_ascii=False, indent=4)

print(f"Results saved to {output_file_path} and {summary_file_path}")
print(f"Summary: {summary}")


  from pandas.core import (


Model Output: To evaluate the claim, we need to consider the global trend in food prices and the specific situation in the United States.

1. **Global Food Prices**: The claim states that food prices decreased by 20 percent globally in one year. This is a specific and measurable assertion.

2. **Food Prices in the U.S.**: The claim then notes a contradiction because food prices in the U.S. are increasing.

To verify the claim, we would need data on global food prices and U.S. food prices for the specified period. Without specific data, we can consider general economic trends and known patterns.

- **Global Food Prices**: Typically, global food prices can be influenced by various factors such as crop yields, trade policies, and economic conditions. A 20 percent decrease globally is significant and would likely be reported by international organizations tracking such data.

- **U.S. Food Prices**: The U.S. food market can be influenced by domestic factors such as labor costs, transportat