## JEE with MultiAgentDebate

In [None]:
import os
import json
from tqdm import trange
import time
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff
import openai

openai.api_type = "azure"
openai.api_base = ""
openai.api_version = ""
openai.api_key = ""

#### MAD core code

In [None]:
#@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def gpt4(text):
    
    try:
        response = openai.ChatCompletion.create(
          engine="gpt-4-32k",
          messages= [
        {"role": "user", "content": text}
    ],
          temperature=0.7,
          max_tokens=6000,
          top_p=1,
          frequency_penalty=0,
          presence_penalty=0
        )
        return response.choices[0].message.content
    except Exception as exc:
        print(exc)
        time.sleep(30)
        return gpt4(text)
    

def get_res_from_other_agents(gpt4_res, get_prompt_for_agent):
    
    final_prompt = "These are responses from other agents."
    agent_c = 1
    for i in range(len(gpt4_res)):
        if i != get_prompt_for_agent:
            final_prompt += f"\n Agent{agent_c} :" + gpt4_res[i] 
            agent_c += 1
    
    final_prompt += "\n Based on the opinion of other agents , give an updated response"
    return final_prompt


def output(init_prompt, no_of_agents):
    
    gpt4_res = [gpt4(init_prompt) for i in range(no_of_agents)]
    
    folder_name = f"{no_of_agents}_Model_multi_agent_output"

    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    gpt4_1_file = os.path.join(folder_name, "gpt4_1.txt")
    
    ## Initial prompt
    with open(gpt4_1_file, 'w') as file:
        file.write("Initial prompt >>>>>>>> "+ init_prompt)
        file.write('*'*500)
        file.write("\n\n")
    
    for i in trange(1, 6):
        gpt4_input = [get_res_from_other_agents(gpt4_res, i) for i in range(no_of_agents)]

        gpt4_res = [gpt4(inp) for inp in gpt4_input]
        
        with open(gpt4_1_file, 'a') as file:
            file.write("After iteration >>>>>>>>>>>>>>>> "+ str(i))
            file.write("\n")
            file.write(gpt4_res[0])
            file.write("\n")
            file.write('~'*500)
            file.write("\n")

    with open(gpt4_1_file, 'r') as file:
        return_text = file.read()
    
    return gpt4_res[0], return_text

### Iterating over dataset

In [None]:
import json
import pickle
from tqdm import trange
import time
import pandas as pd

with open("./data/dataset.json", 'r') as f:
    l = json.load(f)

prompts_dic = {
"MCQ": "In this problem, only one option will be correct. Give a detailed solution and end the solution with the final answer.",
"MCQ(multiple)": "In this problem, multiple options can be correct. Give a detailed solution and end the solution with the final answer.",  
"Integer": "In this problem, the final answer will be a non-negative integer. Give a detailed solution and end the solution with the final answer.",
"Numeric": "In this problem, the final will be a numeric value. Give the numerical answer correct upto the 2nd decimal digit. Give a detailed solution and end the solution with the final answer."
}

ans_dic = {
"MCQ": "A or B or C or D.",
"MCQ(multiple)": "if correct options are A, B and D give ABD alphabetically.",  
"Integer": "Int number only.",
"Numeric": "Float number only."
}

def get_prompt(question: dict):
    
    p = prompts_dic[question['type']]
    f_text = f"""\n\nGive final answer as Final_answer: {ans_dic[question['type']]}"""
    
    return p + "\n\nProblem: " + question['question'] + f_text + "\n\nLet’s think step by step", question['gold']


df = pd.DataFrame({
'description':[],
'index':[],
'subject':[],
'type':[],
'question':[], 
'gold':[],
'pred':[],
"all_iter":[]
})


no_of_agents = 6
for i in trange(len(l)):
    
    try:
        q, a =  get_prompt(l[i])
        res, all_res = output(q, no_of_agents)
        l[i]['pred'] = res 

        df.loc[len(df)] = (l[i]['description'], l[i]['index'], l[i]['subject'], l[i]['type'], l[i]['question'], l[i]['gold'], l[i]['pred'], all_res)

        if i% 1 == 0:
            df.to_excel('Jee_MAD.xlsx', index=False)
    
    except Exception as exc:
        print(f"Exception {exc} at {i}")
        

df.to_excel('./output/Jee_MAD_output.xlsx', index=False)

### Scoring the Outputs.

Started with MAD after COT

Working with 6 agents and 6 rounds of Debate. taking around (20-25min) (Should i increase agent count to 20?)

Also i observe mostly first round results are carried forward to subsequent rounds(even though it is wrong solution.)

Should we include the question as well for each round? 
currently for each round except 1st round, we are passing agents output only.

## JEEBench using Multi Agent Debate.

1. Run MAD_run.py to generate the gpt output for all questions and saves in **Jee_MAD.xlsx**.
2. Run Jee_MAD_Scoring.py to manually score using Paper eavaluation scheme the GPT4 generations comparing with GOLD and generate **Jee_MAD_Scores_paperScoring.xlsx**.
3. Run Jee_MAD_Scoring.py again to manually score using JEE original exam scoring the GPT4 generations comparing with GOLD and generate **Jee_MAD_Scores+4.xlsx**.
4. Run this notebook to see the scores for each Subject and Category.

In [4]:
import pandas as pd

df = pd.read_excel("./JEE/output/Jee_MAD_Scores_paperScoring.xlsx")
df.groupby('subject')['Score'].mean()

subject
chem    0.433453
math    0.281863
phy     0.391509
Name: Score, dtype: float64

In [5]:
df.groupby('type')['Score'].mean()

type
Integer          0.327586
MCQ              0.475610
MCQ(multiple)    0.370739
Numeric          0.270677
Name: Score, dtype: float64

In [6]:
df['Score'].mean()

0.35467706013363026

In [8]:
df = pd.read_excel("./JEE/output/Jee_MAD_Scores+4.xlsx")
print("Total: ", sum(df['Score']))
df.groupby('subject')['Score'].sum()

Total:  387


subject
chem    167
math    115
phy     105
Name: Score, dtype: int64