## JEE with Self-Critic

In [2]:
import json
import pandas as pd
from tqdm import trange, tqdm
import os
import openai
import logging
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff

openai.api_type = "azure"
openai.api_base = ""
openai.api_version = ""
openai.api_key = ""

In [3]:
with open("./data/dataset.json", 'r') as f:
    l = json.load(f)

In [None]:
prompts_dic = {
"MCQ": "In this problem, only one option will be correct. Give a detailed solution and end the solution with the final answer.",
"MCQ(multiple)": "In this problem, multiple options can be correct. Give a detailed solution and end the solution with the final answer.",  
"Integer": "In this problem, the final answer will be a non-negative integer. Give a detailed solution and end the solution with the final answer.",
"Numeric": "In this problem, the final will be a numeric value. Give the numerical answer correct upto the 2nd decimal digit. Give a detailed solution and end the solution with the final answer."
}


@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def gpt4(system, user_prompt):
    
    response = openai.ChatCompletion.create(
      engine="gpt-4-32k",
      messages= [
                {"role": "system", "content": system},
                {"role": "user", "content": user_prompt}
        ],
      temperature=0,
      max_tokens=4000,
      top_p=1,
      frequency_penalty=0,
      presence_penalty=0
    )
    result = response.choices[0].message.content
    
    return result
    

def reflection_prompt(input_text, logger):
    system='''Act like a MIT professor having 20 years of teaching experience. You will first answer the question. Then you will criticize your answer and then finally you will give the correct response.
    
    Instructions :
     You have to only give the correct option as the response
     Give only final response in strictly JSON format only, with one key as "explanation" and the  explanation and the second key as "answer" and the answer.
    '''
    user_prompt=f'''
    Solve the given MCQ: {input_text}
    '''
    result = gpt4(system, user_prompt)
    
    return result
    

def get_json(result, logger, count=1):
    
    try:
        # getting only final Json.
        system='''
        Give response in strictly JSON format only, with one key as "explanation" and the  explanation and the second key as "answer" and the answer.
        '''
        user_prompt=f'''
        Extract Final JSON from:

        {result}
        '''
        result_ = gpt4(system, user_prompt)
        
        return json.loads(result_)
    
    except:
        if count+1 > 5:
            return json.loads('{"explanation": "Something went wrong", "answer": "Wrong"}')
        
        return get_json(result, logger, count+1)


def get_prompt(question: dict):
    p = prompts_dic[question['type']]
    return p + "\n\n" +  question['question'], question['gold']


df = pd.DataFrame({
"Index":[],
"Question":[],
"Gold":[],
"first_answer":[],
"explanation": [],
"Answer":[],
"subject":[],
"type":[]
})

for i in trange(0, len(l)):
    
    try:
        print("Execution: ", i)

        q, a = get_prompt(l[i])

        x = reflection_prompt(q, logger)
        t = get_json(x, logger, 1)

        df.loc[len(df)] = (l[i]['index'], l[i]['question'], l[i]['gold'], x, t["explanation"], t['answer'], l[i]['subject'], l[i]['type'])

        df.to_excel("./output/Jee_Self_Critic_output.xlsx", index=False)
        
    except:
        print(f"Exception at {i}..")
    
df.to_excel("./output/Jee_Self_Critic_output.xlsx", index=False)

### Evaluations.

In [7]:
import pandas as pd
df = pd.read_excel("./output/Jee_Self_Critic_Scores.xlsx")

In [8]:
df['Score'].mean()

0.26222222222222225

In [9]:
df.groupby(['subject'])['Score'].mean()

subject
chem    0.345324
math    0.214461
phy     0.245327
Name: Score, dtype: float64

In [10]:
df.groupby(['type'])['Score'].mean()

type
Integer          0.120690
MCQ              0.433735
MCQ(multiple)    0.267045
Numeric          0.210526
Name: Score, dtype: float64