### Imports

In [6]:
import nest_asyncio; nest_asyncio.apply()
import re

import json
import os
from dotenv import load_dotenv
load_dotenv()

from openai import OpenAI
client = OpenAI()

### Function that sends questions to GPT

In [7]:
model = "gpt-4o"   # very important to check the model!
temp = 0
system_message = r"You're an expert in answering quiz questions about phonology. Only answer with the letter and text of the answer which you think is correct. Make sure that the answer is 100% correct because my life depends on it. Answer this question:"

def pingGPT(prompt, model=model):
   # Creating a message as required by the API
   messages = [{"role":"system", "content": system_message},
               {"role": "user", "content": prompt}]
  
   # Calling the ChatCompletion API
   response = client.chat.completions.create(
       model=model,
       messages=messages,
       temperature=temp,
   )

   # Returning the extracted response
   return response.choices[0].message.content

### Open an itembank, send the questions to raw GPT-4o, obtain and save the responses

In [8]:
LIST_OF_QUIZZES = ["JSON_quizzes_only_closed_questions/"+f for f in os.listdir("./JSON_quizzes_only_closed_questions") if f.endswith('.json')]
print(LIST_OF_QUIZZES)

['JSON_quizzes_only_closed_questions/itembank-2.json', 'JSON_quizzes_only_closed_questions/itembank-3-4.json', 'JSON_quizzes_only_closed_questions/itembank-5-6.json', 'JSON_quizzes_only_closed_questions/itembank-9a.json', 'JSON_quizzes_only_closed_questions/itembank-9b.json']


In [9]:
total_nquestions = 0
total_ncorrects = 0
itembank_reports = [] # to write the itembank number and number of correct answers.

itembank_reports.append(f"Temperature: {temp}")
itembank_reports.append(f"System message: {system_message}\n")

for run_n in range(10):
    print(f"\nPass over all the itembanks number {run_n+1}")
    for doc in LIST_OF_QUIZZES:
        with open(doc, 'r', encoding='UTF-8') as file:
            questions_JSON = json.load(file)
        
        itembank_number = re.search(r"\d[a|b]?(?:\-\d)?", doc).group()
        print(f"----------processing itembank-{itembank_number}----------")
        
        this_ncorrects = 0 # for correct answers in each separate itembank
        this_nquestions = 0

        # main loop that sends requests and saves answers
        for question in questions_JSON:
            full_prompt = question['question']+'\n' + '\n'.join(question['answers'])
            given_answer = pingGPT(full_prompt)  # Get the response from GPT
            question["GPT's response"] = given_answer  # Append the response to the question
            question["correct"] = given_answer==question["correct answer(s)"] # key 'correct' stores True/False value for the correctness of the answer
            
            # count how many total responses are correct
            if given_answer==question["correct answer(s)"]: 
                total_ncorrects+=1
                this_ncorrects+=1
            total_nquestions+=1
            this_nquestions+=1
        
        itembank_reports.append(f"itembank-{itembank_number}:\t\tCorrect answers {this_ncorrects} / total questions {this_nquestions} * 100 = {round(this_ncorrects/this_nquestions*100, 2)}%")

        # Save the updated JSON data to a new file
        with open(f'./results/raw_GPT_responses/raw_GPT_responses-{itembank_number}.json', 'w') as outfile:
            json.dump(questions_JSON, outfile, indent=2)
    itembank_reports.append(" ")
    
    print(f"Overall correct answers {total_ncorrects} / total questions {total_nquestions} * 100 = {round(total_ncorrects/total_nquestions*100, 2)}%\n")

itembank_reports.append(f"\nOverall correct answers {total_ncorrects} / total questions {total_nquestions} * 100 = {round(total_ncorrects/total_nquestions*100, 2)}%\n")
itembank_reports.append("\n------------------------------------------------------------------------\n\n")

# save the prompt and number of correct questions
with open(f"./results/raw_{model}_results.txt", 'a', encoding='UTF-8') as report_file:
    report_file.write('\n'.join(itembank_reports))


Pass over all the itembanks number 1
----------processing itembank-2----------
----------processing itembank-3-4----------
----------processing itembank-5-6----------
----------processing itembank-9a----------
----------processing itembank-9b----------
Overall correct answers 206 / total questions 395 * 100 = 52.15%


Pass over all the itembanks number 2
----------processing itembank-2----------
----------processing itembank-3-4----------
----------processing itembank-5-6----------
----------processing itembank-9a----------
----------processing itembank-9b----------
Overall correct answers 409 / total questions 790 * 100 = 51.77%


Pass over all the itembanks number 3
----------processing itembank-2----------
----------processing itembank-3-4----------
----------processing itembank-5-6----------
----------processing itembank-9a----------
----------processing itembank-9b----------
Overall correct answers 613 / total questions 1185 * 100 = 51.73%


Pass over all the itembanks number 4
-

### Looking at results of specific itembanks

Can see what questions GPT answered incorrectly

In [None]:
with open("results/raw_GPT_responses/raw_GPT_answers-2.json", 'r', encoding='UTF-8') as file:
    questions_to_look_at = json.load(file)


ncor = 0
ntot = 0
for question in questions_to_look_at:
    if question["correct"]==False:
        print("question_number:", question["category"])
        print(question["question"])
        print("correct:", question["correct answer(s)"])
        print("GPT:", question["GPT's response"], "\n")
    else:
        ncor+=1
    ntot+=1
print(f"Correct {ncor} / total {ntot} = {ncor/ntot}")
