In [1]:
import os
import json
import csv
import random
from openai import OpenAI
import time


In [2]:


# initialize client once
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# model name for output files
model_name = "gpt-5"

# folders
BQA_FOLDER = "LogicBenchFiles/Eval .json Files/BQA"
MCQA_FOLDER = "LogicBenchFiles/Eval .json Files/MCQA"
EASTERN_BQA_FOLDER = "LogicBenchFiles/Eval .json Files/Eastern BQA"
EASTERNDATA_BQA_FOLDER = "LogicBenchFiles/Eval .json Files/EasternData BQA"

# for consistent end prompts
ENDPROMPT_BQA = "Only answer 'yes' or 'no' do not provide any additional characters like punctuation or explanation just yes or no thats it.:"
ENDPROMPT_MCQA = "Do not provide any additional characters like punctuation or explanation just the choice thats it (ex. Choice_1, Choice_2, Choice_3, Choice_4).:"
ENDPROMPT_EasternData = (
    "If the question is a yes or no question, only answer 'yes' or 'no'. "
    "If it is a count or recall question, answer with only the exact number or the exact answer. "
    "Do not include any punctuation, explanations, or extra words — just the direct answer.\n\n"
    "For recall questions that require listing course sections with times and days, format exactly like this:\n"
    "Sec 1, tr, 8:00am to 9:15am, Sec 2, mwf, 8:00am to 8:50am, Sec 3, mwf, 9:00am to 9:50am\n"
    "No extra punctuation, no 'and', no trailing periods.\n\n"
    "For recall questions that require listing names, use the professor's full name exactly as displayed in the dataset, "
    "in the same order they appear. Separate names with a comma and a space, like this:\n"
    "Jane Doe, Jack H. Ryan, Beth Stevens\n"
    "Do not reorder, reformat, or add extra text of any kind."
)

# Dataset file paths
DATACONPath = "intermediateFiles/test10Spring2025_contactinfo_2025-04-02.csv"
DATACORPath = "intermediateFiles/Test8Spring2025_Instructor_Course_Schedule_2025-05-07.csv"
DATAOHPath = "intermediateFiles/6TestSpring2025_Exclusive_Hours_2025-05-07.csv"




In [None]:

# function to process BQA files
def process_bqa(file_path):
    with open(file_path, "r") as file:
        data = json.load(file)

    rows = []
    for current_q in data["samples"]:
        for qa1 in current_q["qa_pairs"]:
            prompt = current_q["context"] + "\n" + qa1["question"] + "\n" + ENDPROMPT_BQA

            response = client.responses.create(
                model= model_name,
                input=prompt,
                store=False,
            )
            chat_gpt_answer = response.output_text.lower().strip()

            rows.append({
                "Prompt": prompt,
                "Context": current_q["context"],
                "Question": qa1["question"],
                "Endprompt": ENDPROMPT_BQA,
                "Correct answer": qa1["answer"],
                "ChatGPT answer": chat_gpt_answer
            })
    return rows


# function to process MCQA files
def process_mcqa(file_path):
    with open(file_path, "r") as file:
        data = json.load(file)

    rows = []
    for current_q in data["samples"]:
        prompt = current_q["context"] + "\n" + current_q["question"] + " Select the best answer from the choices below:\n\n"
        for key, value in current_q["choices"].items():
            prompt += f"{key}: {value}\n"
        prompt += "\n" + ENDPROMPT_MCQA

        response = client.responses.create(
            model= model_name,
            input=prompt,
            store=False,
        )
        chat_gpt_answer = response.output_text.lower().strip()

        rows.append({
            "Prompt": prompt,
            "Context": current_q["context"],
            "Question": current_q["question"],
            "Endprompt": ENDPROMPT_MCQA,
            "Correct answer": current_q["answer"],
            "ChatGPT answer": chat_gpt_answer
        })
    return rows



def write_to_csv(file_path, rows):
    # Extract just the filename, replace extension with _completed.csv
    base_name = os.path.basename(file_path).replace(".json", f"_{model_name}_completed.csv")
    # Save into the same folder as the input file
    folder = os.path.dirname(file_path)
    csv_file = os.path.join(folder, base_name)

    with open(csv_file, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["Prompt", "Context", "Question", "Endprompt", "Correct answer", "ChatGPT answer"])
        writer.writeheader()
        writer.writerows(rows)
    print(f"Saved: {csv_file}")



In [3]:
# --- new helper function to read dataset text ---
def read_csv_as_text(file_path, max_lines=None):
    """Reads up to max_lines of CSV to include as reference text for the model."""
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            lines = f.readlines()
            # keep it small enough to avoid token overload
            preview = "".join(lines[:max_lines])
            return f"\n\n[DATASET PREVIEW FROM {os.path.basename(file_path)}]\n{preview}\n\n"
    except Exception as e:
        return f"\n\n[Could not load dataset: {e}]\n\n"

# --- logic for dataset choice ---
def get_dataset_and_text(context_text):
    context_lower = context_text.lower()
    if "faculty office hours" in context_lower:
        return DATAOHPath, read_csv_as_text(DATAOHPath)
    elif "course times" in context_lower:
        return DATACORPath, read_csv_as_text(DATACORPath)
    elif "contact info" in context_lower or "faculty contact" in context_lower:
        return DATACONPath, read_csv_as_text(DATACONPath)
    else:
        return None, ""

def process_bqaDATA(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    rows = []
    for current_q in data["samples"]:
        time.sleep(15)
        dataset_path, dataset_text = get_dataset_and_text(current_q["context"])

        for qa1 in current_q["qa_pairs"]:
            # embed dataset text directly in the prompt
            prompt = (
                f"{current_q['context']}\n{dataset_text}{qa1['question']}\n{ENDPROMPT_EasternData}"
            )

            displayprompt =(
                f"{current_q['context']}\n{"' Data is inserted here' "}{qa1['question']}\n{ENDPROMPT_EasternData}"
            )

            response = client.responses.create(
                model=model_name,
                input=prompt,
                store=False,
            )

            chat_gpt_answer = response.output_text.lower().strip()

            rows.append({
                "Prompt": displayprompt,
                "Context": current_q["context"],
                "Question": qa1["question"],
                "Dataset Used": dataset_path if dataset_path else "N/A",
                "Endprompt": ENDPROMPT_EasternData,
                "Correct answer": qa1["answer"],
                "ChatGPT answer": chat_gpt_answer
            })
    return rows

def write_to_csv(file_path, rows):
    base_name = os.path.basename(file_path).replace(".json", f"_{model_name}_completed.csv")
    folder = os.path.dirname(file_path)
    csv_file = os.path.join(folder, base_name)

    with open(csv_file, "w", newline="", encoding="utf-8") as f:
        fieldnames = ["Prompt", "Context", "Question", "Dataset Used", "Endprompt", "Correct answer", "ChatGPT answer"]
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)
    print(f"Saved: {csv_file}")

In [None]:

# main loop for Logic Bench
for folder, processor in [(BQA_FOLDER, process_bqa), (MCQA_FOLDER, process_mcqa)]:
    for filename in os.listdir(folder):
        if filename.endswith(".json"):
            filepath = os.path.join(folder, filename)
            print(f"Processing {filepath}...")
            rows = processor(filepath)
            write_to_csv(filepath, rows)

In [None]:
# main loop for Eastern BQA
for folder, processor in [(EASTERN_BQA_FOLDER, process_bqa)]:
    for filename in os.listdir(folder):
        if filename.endswith(".json"):
            filepath = os.path.join(folder, filename)
            print(f"Processing {filepath}...")
            rows = processor(filepath)
            write_to_csv(filepath, rows)

In [4]:
# main loop for Eastern Dataset Questions
for folder, processor in [(EASTERNDATA_BQA_FOLDER, process_bqaDATA)]:
    for filename in os.listdir(folder):
        if filename.endswith(".json"):
            filepath = os.path.join(folder, filename)
            print(f"Processing {filepath}...")
            rows = processor(filepath)
            write_to_csv(filepath, rows)

Processing LogicBenchFiles/Eval .json Files/EasternData BQA\Count.json...


InternalServerError: Error code: 500 - {'error': {'message': 'An error occurred while processing your request. You can retry your request, or contact us through our help center at help.openai.com if the error persists. Please include the request ID req_78aeee4441284622831d3422c39843cc in your message.', 'type': 'server_error', 'param': None, 'code': 'server_error'}}

In [None]:
import os
from openai import OpenAI
import random
import csv
import json

In [None]:
#model_name = "gpt-5"

client = OpenAI(
    api_key= os.getenv("OPENAI_API_KEY")
    )


response = client.responses.create(
  model= "gpt-4o-mini",
  input="Hello ChatGPT, What model are you?",
  store=True,
)

print(response.output_text);


In [None]:
with open('LogicBenchFiles/Eval .json Files/BQA/EVAL_ModusPones(BQA).json') as file :
    questions = file.read()

questions = json.loads(questions)
questions

In [None]:
with open('LogicBenchFiles/Eval .json Files/MCQA/EVAL_ModusPones(MCQA).json') as file :
    questions2 = file.read()

questions2 = json.loads(questions2)
questions2

In [None]:
q3 = questions2['samples']
len(q3)
print(q3[0].keys())

print(q3[0]['answer'])
print(q3[0]['choices'])



In [None]:
q = questions['samples']
len(q)

In [None]:
print(q[0].keys())

print(q[0]['context'])

q[5]['qa_pairs']

Each prompt:
- include context ('context')
- include question ('q_pairs')
- compare to answer ('answer')

In [None]:
q[5]['qa_pairs'][0]['answer']

In [None]:


for current_q in q:
        
    for qa1 in current_q['qa_pairs']:
        prompt = '' 
        prompt += current_q['context'] + '\n' + qa1['question'] + '\n' + "Only answer 'yes' or 'no' do not provide any additonal characters like punctuation or explantation just yes or no thats it.:"

        client = OpenAI(
        api_key= os.getenv("OPENAI_API_KEY")
        )   


        response = client.responses.create(
            model= "gpt-4o-mini",
            input= prompt,
            store=False,
        )
        chat_gpt_answer = (response.output_text);




        print(prompt)
        num = random.random()
        chat_gpt = ' '
        if num > 0.6:
            chat_gpt = 'no'
        elif num > 0.2:
            chat_gpt = 'yes'
        print('ChatGPT answer:', chat_gpt_answer.lower().strip())
        print('answer:', qa1['answer'])
        print()



In [None]:
for current_q2 in q3:
        prompt1 = '' 
        prompt1 += current_q2['context'] + '\n' + current_q2['question'] + ' Select the best answer from the choices below:\n\n' 
        
        for key, value in current_q2['choices'].items():
            prompt1 += f"{key}: {value}\n"


        prompt1 += "\nDo not provide any additonal characters like punctuation or explantation just the choice thats it(ex. Choice_1, Choice_2, Choice_3, Choice_4).:"

    
        client = OpenAI(
        api_key= os.getenv("OPENAI_API_KEY")
        )   


        response = client.responses.create(
            model= "gpt-4o-mini",
            input= prompt1,
            store=False,
        )
        chat_gpt_answer2 = (response.output_text);
        


        
        print(prompt1)
        num = random.random()
        chat_gpt2 = ' '
        if num > 0.6:
            chat_gpt2 = 'choice 1'
        elif num > 0.2:
            chat_gpt2 = 'choice 2'
        print('ChatGPT answer:', chat_gpt_answer2.lower().strip())
        print('answer:', current_q2['answer'])
        print()