# Automatic evaluation part 2

This is demo code for  Automatic evaluation part 2.

Input files include response from strategy 1 model 1 (chatGPT_response.txt) and the MC questions. The output are the MC answer.

## Package installation

In [None]:
%pip install python-docx
%pip install anthropic
# !pip install mistralai
%pip install tiktoken

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.1.2
Collecting anthropic
  Downloading anthropic-0.49.0-py3-none-any.whl.metadata (24 kB)
Downloading anthropic-0.49.0-py3-none-any.whl (243 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.4/243.4 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.49.0
Collecting tiktoken
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x8

In [None]:
import os
import fnmatch
import pandas as pd
from google.colab import drive

from openai import OpenAI
import google.generativeai as genai
import anthropic
# from mistralai import Mistral

import csv
import json
import docx
import openpyxl
import base64
import chardet
from typing import Union, List, Dict
import tiktoken
import time
import numpy as np

## File Reading Functions

In [None]:
def file_to_text(file_path: str) -> str:
    """
    Converts a DOCX, XLSX, CSV, JSON, RMD (R Markdown), or PY file
    into a text string suitable for LLM input.

    :param file_path: The path to the file to be converted.
    :return: A single string containing the file’s textual contents.
    """
    _, ext = os.path.splitext(file_path)
    ext = ext.lower()

    if ext == ".docx":
        return _docx_to_text(file_path)
    elif ext == ".xlsx":
        return _xlsx_to_text(file_path)
    elif ext == ".csv":
        return _csv_to_text(file_path)
    elif ext == ".json":
        return _json_to_text(file_path)
    elif ext == ".rmd":
        return _rmarkdown_to_text(file_path)
    elif ext == ".md":
        return _rmarkdown_to_text(file_path)
    elif ext == ".py":
        return _python_to_text(file_path)
    elif ext == ".txt":
        return _txt_to_text(file_path)
    else:
        raise ValueError(f"Unsupported file extension: {ext}")


def _docx_to_text(file_path: str) -> str:
    """
    Reads a DOCX file and extracts all text paragraphs.
    Requires 'python-docx' (pip install python-docx).
    """
    if docx is None:
        raise ImportError("Missing dependency 'python-docx'. Install via `pip install python-docx`.")

    doc = docx.Document(file_path)
    paragraphs = [para.text for para in doc.paragraphs]
    return "\n".join(paragraphs)


def _xlsx_to_text(file_path: str) -> str:
    """
    Reads an XLSX file and concatenates all cells as text.
    Requires 'openpyxl' (pip install openpyxl).
    """
    if openpyxl is None:
        raise ImportError("Missing dependency 'openpyxl'. Install via `pip install openpyxl`.")

    wb = openpyxl.load_workbook(file_path, data_only=True)
    all_text = []
    for sheet_name in wb.sheetnames:
        sheet = wb[sheet_name]
        all_text.append(f"--- Sheet: {sheet_name} ---")
        for row in sheet.iter_rows(values_only=True):
            row_text = [str(cell) if cell is not None else "" for cell in row]
            all_text.append("\t".join(row_text))

    return "\n".join(all_text)


def _csv_to_text(file_path: str) -> str:
    """
    Reads a CSV file line by line and returns its text representation.
    """
    rows = []
    with open(file_path, newline='', encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            rows.append("\t".join(row))
    return "\n".join(rows)


def _json_to_text(file_path: str) -> str:
    """
    Reads a JSON file and returns its pretty-printed JSON string.
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return json.dumps(data, indent=2, ensure_ascii=False)


def _rmarkdown_to_text(file_path: str) -> str:
    """
    R Markdown files are basically text files with embedded code.
    We’ll just read the raw text for simplicity.
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    return content

def _txt_to_text(file_path: str) -> str:
    """
    Reads a text file and returns its content as a string.
    Automatically detects encoding to avoid decoding errors.
    """
    #import chardet
    try:
        # Detect the file encoding
        with open(file_path, 'rb') as file:
            raw_data = file.read()
            detected = chardet.detect(raw_data)
            encoding = detected['encoding']

        # Read the file with the detected encoding
        with open(file_path, 'r', encoding=encoding) as file:
            return file.read()
    except Exception as e:
        raise RuntimeError(f"Error reading file {file_path}: {e}")

def _python_to_text(file_path: str) -> str:
    """
    Python (.py) files are plain text. Just read the entire file content.
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        code = f.read()
    return code

def encode_image(image_path: str) -> tuple[str, str]:
    """
    Encode an image file to base64 and return its MIME type.
    """
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image file not found: {image_path}")

    # Read and encode image
    with open(image_path, "rb") as image_file:
        base64_data = base64.b64encode(image_file.read()).decode('utf-8')

    # Get MIME type based on file extension
    _, extension = os.path.splitext(image_path)
    extension = extension.lower()

    mime_type = {
        '.jpg': 'image/jpeg',
        '.jpeg': 'image/jpeg',
        '.png': 'image/png',
        '.gif': 'image/gif',
        '.webp': 'image/webp'
    }.get(extension)

    if not mime_type:
        raise ValueError(f"Unsupported image format: {extension}")

    return base64_data, mime_type


## Model functions

In [None]:
def generate_MC_answer_openai(api_key: str,  dir: str, sample_id: str, mc_doc: str, case_id: str, report_name:str, subfolder: str, system_prompt: str = None, model: str = "gpt-4o") -> str:
    """
    Generates a multiple-choice answer and checks if the required information is in the report using OpenAI's API.

    Args:
        api_key (str): Your OpenAI API key.
        dir (str): Directory path.
        sample_id (str): Unique sample identifier.
        mc_doc (str): The multiple-choice question content.
        case_id (str): Case study identifier.
        subfolder (str): Subfolder containing additional files.
        system_prompt (str, optional): System instruction for OpenAI model.
        model (str, optional): The OpenAI model to use. Defaults to "gpt-4o".

    Returns:
        tuple: (MCQ answer, Yes/No answer)
    """

    # Initialize OpenAI client
    client = OpenAI(api_key=api_key)

    # report_txt= file_to_text(os.path.join(dir,"Report_output", sample_id+"."+ model+".txt"))
    report_txt= file_to_text(os.path.join(dir,"Report_output",report_name))
    if report_txt.strip().lower() == "error":
        report_name_updated = report_name.replace(".txt", "-updated.txt")
        report_txt= file_to_text(os.path.join(dir,"Rmd_word_document",subfolder,case_id,report_name_updated))


    # Use a default system prompt if none is provided
    if system_prompt is None:
        system_prompt = (
        "You are an expert scientist who has a strong background in both bioinformatics and biology."
        "I have provided you with a .txt file containing the outputs from my analysis, and in some cases, additional supporting materials such as plots. "
        "These files are the core analytics you will need to understand to read the following multiple choice question. When answering the multiple choice question, please select one option only. "
        "Please be concise without including any additional information. For example if you think option A is the correct answer, then please just output A."
        )

    # Prepare the input message
    messages = [
        {"role": "system", "content": system_prompt}
    ]

    # Generate the completion
    try:
        messages.append({"role": "user", "content": "Please read the following report and use the information to answer the multiple choice question below.\n"+
                         "Report:\n"+report_txt+"\n\nMultiple Choice Question:\n"+mc_doc+
                         "Please provide only the letter of the correct option (A, B, C, D, or E). Do not include the answer text, explanations, or any other information."})

        # Question 1: Multiple Choice Answer
        response_mcq = client.chat.completions.create(
            model=model,
            messages=messages
        )

        # Extract MCQ answer
        mcq_answer = response_mcq.choices[0].message.content

        # Append the response to maintain conversation history
        messages.append({"role": "assistant", "content": mcq_answer})

        # Question 2: Yes/No - Is the MCQ information in the report?
        messages.append({"role": "user", "content": "Does the report contain the information necessary to answer the multiple-choice question? Please provide only 'Yes' or 'No'. Do not include explanations or additional details."})

        response_yes_no = client.chat.completions.create(
            model=model,
            messages=messages  # Now includes Q1, A1, and Q2
        )

        # Extract Yes/No answer
        yes_no_answer = response_yes_no.choices[0].message.content

        return mcq_answer, yes_no_answer

    except Exception as e:
        print(f"Exception occurred: {e}")
            # Return a default error string
        return "error", "error"



In [None]:
def generate_MC_answer_google_gemini(api_key: str,  dir: str, sample_id: str, mc_doc: str, case_id: str, report_name:str, subfolder: str, system_prompt: str = None, model: str = "gemini-2.0-flash-exp") -> str:
    """
    Generates a multiple-choice answer and determines if the MCQ information is provided in the report using Google's Generative AI API.

    Args:
        api_key (str): Your Google Generative AI API key.
        dir (str): Directory path.
        sample_id (str): Unique sample identifier.
        mc_doc (str): The multiple-choice document content for the question.
        case_id (str): Case study identifier.
        subfolder (str): Subfolder containing additional files.
        system_prompt (str, optional): The system-level instruction for the AI. Defaults to a biological writing prompt.
        model (str, optional): The Google Generative AI model to use. Defaults to "gemini-2.0-flash-exp".

    Returns:
        tuple: (MCQ answer, Yes/No answer)
    """
    # Configure the API key
    genai.configure(api_key=api_key)


    report_txt= file_to_text(os.path.join(dir,"Report_output",report_name))
    if report_txt.strip().lower() == "error":
        report_name_updated = report_name.replace(".txt", "-updated.txt")
        report_txt= file_to_text(os.path.join(dir,"Rmd_word_document",subfolder,case_id,report_name_updated))



    # Use a default system prompt if none is provided
    if system_prompt is None:
        system_prompt = (
        "You are an expert scientist who has a strong background in both bioinformatics and biology."
        "I have provided you with a .txt file containing the outputs from my analysis, and in some cases, additional supporting materials such as plots. "
        "These files are the core analytics you will need to understand to read the following multiple choice question. When answering the multiple choice question, please select one option only. "
        "Please be concise without including any additional information. For example if you think option A is the correct answer, then please just output A."
        )

    # Create the model
    # generation_config = {
    #   # "temperature": 0.1,
    #   "top_p": 0.95,
    #   "top_k": 40,
    #   "max_output_tokens": 8192,
    #   "response_mime_type": "text/plain",
    # }

    model = genai.GenerativeModel(
      model_name=model,
      # generation_config=generation_config,
      system_instruction=system_prompt,
    )

      # Prepare the input as a structured chat history
    history = [
        {
            "role": "user",
            "parts": [
                "Please read the following report and use the information to answer the multiple choice question below.",
                "Report:",
                report_txt,
                "Multiple choice question:",
                mc_doc

            ]
        }
    ]

    try:
        # Start a chat session
        chat_session = model.start_chat(history=history)

        # Question 1: Multiple Choice Answer
        response_mcq = chat_session.send_message("Please provide only the letter of the correct option (A, B, C, D, or E). Do not include the answer text, explanations, or any other information.")

        # Question 2: Yes/No Answer - Is the MCQ information in the report?
        response_yes_no = chat_session.send_message("Does the report contain the information necessary to answer the multiple-choice question? Please provide only 'Yes' or 'No'. Do not include explanations or additional details.")

        return response_mcq.text, response_yes_no.text

    except Exception as e:
        print(f"Exception occurred: {e}")
        return "error", "error"

In [None]:
def generate_MC_answer_claude(api_key: str,  dir: str, sample_id: str, mc_doc: str, case_id: str, report_name: str, subfolder: str, system_prompt: str = None, model: str = "claude-3-5-sonnet-20241022") -> str:
    """
    Generates a multiple-choice answer and checks if the required information is in the report using Claude's API.

    Args:
        api_key (str): Your Claude API key.
        dir (str): Directory path.
        sample_id (str): Unique sample identifier.
        mc_doc (str): The multiple-choice question content.
        case_id (str): Case study identifier.
        subfolder (str): Subfolder containing additional files.
        system_prompt (str, optional): System instruction for Claude.
        model (str, optional): The Claude model to use. Defaults to "claude-3-5-sonnet-20241022".

    Returns:
        tuple: (MCQ answer, Yes/No answer)
    """
    # Initialize the Claude API client
    client = anthropic.Anthropic(
        api_key=api_key
    )


    # report_txt= file_to_text(os.path.join(dir,"Report_output", sample_id+"."+ model+".txt"))
    # if report_txt.strip().lower() == "error":
    #     report_txt= file_to_text(os.path.join(dir,"Rmd_word_document",subfolder,case_id, sample_id+"."+ model+"-updated.txt"))

    report_txt= file_to_text(os.path.join(dir,"Report_output",report_name))
    if report_txt.strip().lower() == "error":
        report_name_updated = report_name.replace(".txt", "-updated.txt")
        report_txt= file_to_text(os.path.join(dir,"Rmd_word_document",subfolder,case_id,report_name_updated))

    # Use a default system prompt if none is provided
    if system_prompt is None:
        system_prompt = (
        "You are an expert scientist who has a strong background in both bioinformatics and biology."
        "I have provided you with a .txt file containing the outputs from my analysis, and in some cases, additional supporting materials such as plots. "
        "These files are the core analytics you will need to understand to read the following multiple choice question. When answering the multiple choice question, please select one option only. "
        "Please be concise without including any additional information. For example if you think option A is the correct answer, then please just output A."
        )

    # Prepare the messages for the Claude API
    messages = [
            {
                "role": "user",
                "content": ("Please read the following report and use the information to answer the multiple choice question below.\n"+
                         "Report:\n"+report_txt+"\n\nMultiple Choice Question:\n"+mc_doc+
                         "Please provide only the letter of the correct option (A, B, C, D, or E). Do not include the answer text, explanations, or any other information."
                )
            }
        ]

    try:
        messages.append({"role": "user", "content": "Please provide only the letter of the correct option (A, B, C, D, or E). Do not include the answer text, explanations, or any other information."})

        # Question 1: Multiple Choice Answer
        response_mcq = client.messages.create(
            model=model,
            max_tokens=1024,
            system=system_prompt,
            messages=messages
        )

        # Extract MCQ answer
        mcq_answer = response_mcq.content[0].text.strip()

        # Append the response to maintain conversation history
        messages.append({"role": "assistant", "content": mcq_answer})

        # Question 2: Yes/No - Is the MCQ information in the report?
        messages.append({"role": "user", "content": "Does the report contain the information necessary to answer the multiple-choice question? Please provide only 'Yes' or 'No'. Do not include explanations or additional details."})

        response_yes_no = client.messages.create(
            model=model,
            max_tokens=1024,
            system=system_prompt,
            messages=messages  # Now includes Q1, A1, and Q2
        )

        # Extract Yes/No answer
        yes_no_answer = response_yes_no.content[0].text.strip()

        return mcq_answer, yes_no_answer

    except Exception as e:
        print(f"Exception occurred: {e}")
        return "error", "error"


## Mount google drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


## Read Prompt Sheet

In [None]:
## Don't change this url
url = 'https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/export?format=csv&gid=XXXXXXXXXXx'
case_df = pd.read_csv(url)
print(case_df.head(2))
df=case_df

     Authors          Google Folder              CaseStudy_ID  \
0  EXAMPLE 1  Bioinformatics_method  Dataset_method_increment   
1  EXAMPLE 2  Bioinformatics_method  Dataset_method_increment   

                   Sample_ID LLM task updated? Input type RMD_Code_input_ID  \
0  Dataset_method_incrementA                No      Graph               NaN   
1   Dataset_method_increment                No       Data               NaN   

  TXT_input_ID Graphics_input_Folder Data_input_ID  ... Task Difficulty  \
0          NaN                   NaN           NaN  ...          Simple   
1          NaN                   NaN           NaN  ...             NaN   

  Task Difficulty (evaluated by Fei) (# clues given - score by human)   \
0                             Simple                                 0   
1                             Simple                                 1   

  Task (this should match what's in your .rmd file)  MC1 ID (easy)  \
0  What are the pathways that are most associat

In [None]:
## Don't change this url
url = 'https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXxxxxx/export?format=csv&gid=XXXXXX'
mc_df = pd.read_csv(url)
# print(mc_df.head(25))
# print(mc_df['Specific Question'][0])
# print(mc_df['Google Folder'][0])
# print(mc_df['CaseStudy_ID'][0])

## Parameters

In [None]:
OPENAI_api_key= "sk-"
GEMINI_api_key= "AI"
CLAUDE_api_key="sk-"
## root_dir is the only path that you need to modify by yourself.
## You may find the shared Proj-LLM-Bioinfo-Interpretation2024 folder in the /content/drive/MyDrive,
## so the dir path can be /content/drive/MyDrive/Proj-LLM-Bioinfo-Interpretation2024/Rmd_word_document/
root_dir = "/content/drive/MyDrive/Usyd/Proj-LLM-Bioinfo-Interpretation2024/"
# pattern = "MC_*.docx"

In [None]:
#### test
os.path.join(root_dir, df['Google Folder'][1], df['CaseStudy_ID'][1])

'/content/drive/MyDrive/Usyd/Proj-LLM-Bioinfo-Interpretation2024/Bioinformatics_method/Dataset_method_increment'

## Multiple Choice questions

In [None]:
columns_to_check = ['TXT_input_ID', 'Graphics_input_Folder', 'Data_input_ID']

results = []
index=np.where(mc_df["Authors"] == "CL")[0]
len(index)

9

In [None]:
columns_to_check = ['TXT_input_ID', 'Graphics_input_Folder', 'Data_input_ID']

results = []
index=np.where(mc_df["Authors"] == "CL")[0]
for idx in index: #range(len(mc_df)):range(18,19) np.where(mc_df["Authors"] == "DK")[0]
    print(mc_df['Google Folder'][idx]+" "+str(idx))
    print(mc_df['CaseStudy_ID'][idx])
    if pd.notna(mc_df['Google Folder'][idx]) and pd.notna(mc_df['CaseStudy_ID'][idx]):
        subfolder = os.path.join(
            root_dir, "Rmd_word_document",
            mc_df['Google Folder'][idx].strip().replace(" ", "_"),  # Remove spaces
            mc_df['CaseStudy_ID'][idx].strip().replace(" ", "_")    # Remove spaces
            )
        print(subfolder)
        if os.path.exists(subfolder):
            target_folder=mc_df['Google Folder'][idx]
            target_casestudy=mc_df['CaseStudy_ID'][idx]
            mc_txt=mc_df['Specific Question'][idx]
            print(mc_txt)
            filtered_df = case_df[case_df['CaseStudy_ID'] == target_casestudy]
            for i in range(filtered_df.shape[0]): #
                row_entry=filtered_df.iloc[i]
                # print(get_non_empty_values(row_entry,columns_to_check,dir=os.path.join(root_dir,target_folder,target_casestudy)))
                for num in ["gpt-4o","gemini-2.0-flash","claude-3-7-sonnet-20250219"]:
                    # print(num)
                    report_name=row_entry["Sample_ID"]+"."+num+".txt"
                    print(report_name)
                    gpt4o_mcq_answer, gpt4o_info_check =generate_MC_answer_openai(api_key=OPENAI_api_key, dir=root_dir,case_id=target_casestudy,subfolder=target_folder,
                                              sample_id=row_entry["Sample_ID"],report_name=report_name, mc_doc=mc_txt, model = "gpt-4o")

                    gemini_mcq_answer, gemini_info_check =generate_MC_answer_google_gemini(api_key=GEMINI_api_key, dir=root_dir,case_id=target_casestudy,subfolder=target_folder,
                                              sample_id=row_entry["Sample_ID"],report_name=report_name, mc_doc=mc_txt,  model = "gemini-2.0-flash")

                    claude_mcq_answer, claude_info_check= generate_MC_answer_claude(api_key=CLAUDE_api_key, dir=root_dir, case_id=target_casestudy,subfolder=target_folder,
                                              sample_id=row_entry["Sample_ID"],report_name=report_name, mc_doc=mc_txt, model = "claude-3-7-sonnet-20250219")

                    print("Sample ID: "+row_entry['Sample_ID']+", Question ID: "+
                          mc_df["CaseStudy_ID"][idx]+"_"+str(mc_df["Question_ID"][idx])+
                          ", GPT4o answer:"+gpt4o_mcq_answer+" info:"+gpt4o_info_check
                          +"\n")
                    print("Sample ID: "+row_entry['Sample_ID']+", Question ID: "+
                          mc_df["CaseStudy_ID"][idx]+"_"+str(mc_df["Question_ID"][idx])+
                          ", Gemini answer:"+gemini_mcq_answer+" info:"+gemini_info_check+"\n")
                    print("Sample ID: "+row_entry['Sample_ID']+", Question ID: "+
                          mc_df["CaseStudy_ID"][idx]+"_"+str(mc_df["Question_ID"][idx])+
                          ", Claude answer:"+claude_mcq_answer+" info:"+claude_info_check+"\n")
                    time.sleep(10)
                    model_answers = {
                        "gpt-4o": gpt4o_mcq_answer,
                        "gemini-2.0-flash": gemini_mcq_answer,
                        "claude-3-7-sonnet-20250219": claude_mcq_answer#,
                    }
                    info_check={
                        "gpt-4o": gpt4o_info_check,
                        "gemini-2.0-flash": gemini_info_check,
                        "claude-3-7-sonnet-20250219": claude_info_check
                    }
                    for j in model_answers.keys():
                        new_row = {
                            "CaseStudy_ID": mc_df['CaseStudy_ID'][idx],
                            "Question_ID":  mc_df['Question_ID'][idx],
                            "Answer": mc_df['Answer'][idx],
                            "Sample_ID": row_entry['Sample_ID'],
                            "Model_name": j,
                            "Model_return": model_answers[j],
                            "Info_check": info_check[j],
                            "Report_name": report_name#,

                        }
                        selected_columns = ["Authors","Google Folder", "Data", "Task Category"]  # Replace with actual column names
                        new_row.update(row_entry[selected_columns].to_dict())
                        results.append(new_row)


result_df = pd.DataFrame(results)



SpatialSim 51
benchmark_SpatialSim_1
/content/drive/MyDrive/Usyd/Proj-LLM-Bioinfo-Interpretation2024/Rmd_word_document/SpatialSim/benchmark_SpatialSim_1
Question: Which simulator performed the best based on the overall score?
A: SRTsim.
B: scDesign3.
C: SPARsim.
D: Symsim.
E. None of the above
benchmark_SpatialSim_1A.gpt-4o.txt
Sample ID: benchmark_SpatialSim_1A, Question ID: benchmark_SpatialSim_1_1, GPT4o answer:C info:Yes

Sample ID: benchmark_SpatialSim_1A, Question ID: benchmark_SpatialSim_1_1, Gemini answer:E
 info:Yes


Sample ID: benchmark_SpatialSim_1A, Question ID: benchmark_SpatialSim_1_1, Claude answer:A info:Yes

benchmark_SpatialSim_1A.gemini-2.0-flash.txt
Sample ID: benchmark_SpatialSim_1A, Question ID: benchmark_SpatialSim_1_1, GPT4o answer:A info:Yes

Sample ID: benchmark_SpatialSim_1A, Question ID: benchmark_SpatialSim_1_1, Gemini answer:A
 info:Yes


Sample ID: benchmark_SpatialSim_1A, Question ID: benchmark_SpatialSim_1_1, Claude answer:A info:Yes

benchmark_Spatial

In [None]:
result_df["Model_return"] = result_df["Model_return"].str.strip().str.replace(r"\.$", "", regex=True)
result_df["Info_check"] = result_df["Info_check"].str.strip().str.replace(r"\.$", "", regex=True)

result_df.shape

(324, 12)

In [None]:
output_path=os.path.join("/content/drive/MyDrive/Usyd/Proj-LLM-Bioinfo-Interpretation2024/MCQ_output","APRIL15_MCQ_result_strategy1.csv")
print(output_path)
# Check if the file exists
file_exists = os.path.isfile(output_path)
# Save data: Append if file exists, otherwise create a new one
result_df.to_csv(output_path, mode='a', index=False, header=not file_exists)

/content/drive/MyDrive/Usyd/Proj-LLM-Bioinfo-Interpretation2024/MCQ_output/APRIL15_MCQ_result_strategy1.csv
