In [None]:
import os
import time
import json 
import regex
import random
import pickle 
import re
import sys
import numpy as np
from tqdm import tqdm, trange
import pandas as pd
from math import *
import sys
import openai
from openai.error import RateLimitError
from func_timeout import func_timeout, FunctionTimedOut
import argparse
import torch

## Getting K nearest neighbours for each of the test questions.

In [None]:
def read_pickle(path):
    with open(path, 'rb') as f:
        ll = pickle.load(f)
        
    return ll

def write_pickle(path, ll):
    with open(path, 'wb') as f:
        pickle.dump(ll, f)

In [None]:
MATH_train = read_pickle("MATH_embed_train.pkl")
MATH_test = read_pickle("MATH_embed_test.pkl")

MATH_train = torch.tensor(MATH_train)
MATH_test = torch.tensor(MATH_test)

In [None]:
euc_dist = torch.cdist(MATH_test, MATH_train, p=2)

In [None]:
# Three nearest neighbor for each question in test dataset
knn = {}
for i in trange(3744):
    nn = [j[0] for j in sorted(list(enumerate(euc_dist[i])), key= lambda x: x[1])[:3]]
    knn[i+8737] = nn

In [None]:
write_pickle("knn_new.pkl", knn)

In [None]:
with open("knn_new.pkl", 'rb') as f:
    knn = pickle.load(f)
    
with open("./COT_Pred/gpt4_cot_final_pred.json","r") as f:
    data = json.load(f)

In [None]:
class MATH_MedPrompt:
    def __init__(self, base, version, key, lm, method, backend_args, quota_args, **kwargs):
        self.lm = lm
        self.method = method  # step
        self.backend = backend_args['name']  # openai
        
        if self.backend == 'openai':
            openai.api_type = "azure"
            openai.api_base = base
            openai.api_version = version
            openai.api_key = key
    
        self.top_p = backend_args['top_p']
        self.temp = backend_args['temp']
        self.max_token = backend_args['max_token']
        self.presence_penalty = backend_args['presence_penalty']        
        
        self.max_iter_per_instance = quota_args['max_iter_per_instance']

        
        self.history = []
        self.strategy = None

        # openai api
        self.n_prompt_token = 0
        self.n_sample_token = 0
        self.messages = []
    
    def call_openai_api(self, messages, stop, lm=None, top_p=None):
        n_try = 10
        while n_try > 0:
            try:
                time.sleep(1)
                response = func_timeout(90, 
                    openai.ChatCompletion.create,
                    kwargs={
                        "engine": self.lm if lm is None else lm,
                        "messages": messages,
                        "top_p": self.top_p if top_p is None else top_p,
                        "temperature": self.temp,
                        "max_tokens": self.max_token,
                        "presence_penalty": self.presence_penalty,
                        "stop": stop,
                    }
                )
                break
            except FunctionTimedOut:
                print('[LOG] OpenAI API call timeout')
                n_try -= 1
                if n_try == 0:
                    raise Exception('Failed 10 retries.')
                continue
            except Exception as e:
                #print('[LOG]', e)
                time.sleep(15)
                n_try -= 1
                if n_try == 0:
                    raise Exception('Failed 10 retries.')
                continue
        return response
    
    def call_lm(self, prompt, add_response=True, stop=None, lm=None, top_p=None):
        
        self.messages.append({'role': 'user', 'content': prompt})
        #self.messages = [{'role': 'user', 'content': prompt}]
        response = self.call_openai_api(self.messages, stop, lm=lm, top_p=top_p)
        if add_response: self.messages.append(response['choices'][0]['message'])
        self.n_prompt_token += response['usage']['prompt_tokens']
        self.n_sample_token += response['usage']['completion_tokens']
        return response['choices'][0]['message']['content']
    

## Using chain of thought of nearest neighbours as fewshot examples to get the 10 fewshot-COT response for test question

In [None]:
def fewshot_cot_prompt(key):
    prompt = "<Examples>\n"
    for i in knn[key]:
        p = "Problem - "+data[i]['problem']+"\nSolution - Let's think step by step : \n"+data[i]['CoT']+"\nFinal answer - "+data[i]['final_answer']+"\n"+20*"*"+"\n"
        prompt+=p
    prompt+="<End of Examples>\n\n"
    prompt+="Problem - "+data[key]['problem']+"\nSolution - Let's think step by step : \n"
    return prompt


def run_fewshot_cot(fs_prompt):
    response = []
    for i in range(10):
        math = MATH_MedPrompt(base, version, key, "gpt-4-turbo", 'step', backend_args, quota_args)
        res = math.call_lm(fs_prompt)
        response.append(res),
    return response

In [None]:
final_json = []
for i in trange(start, end):
    
    fs_prompt = fewshot_cot_prompt(i)
    res_final = run_fewshot_cot(fs_prompt)
    final_json.append({
        "index": i,
        "problem": data[i]['problem'],
        "FS_CoT" : res_final
    })
    if i%1==0:
        with open(f"./10FS_COT_Pred/gpt4_{start}_{end}_fs_cot_pred.json", 'w') as f:
            json.dump(final_json, f)
            
with open(f"./10FS_COT_Pred/gpt4_{start}_{end}_fs_cot_pred.json", 'w') as f:
            json.dump(final_json, f)

## Now using the COT we got from previous step to get the final answer for each of the fewshot examples

In [None]:
with open("./10FS_COT_Pred/gpt4_10fs_final.json") as f:
    data = json.load(f)

In [None]:
def final_answer_prompt(prompt_cot):
    return prompt_cot+"\n\nGive the final answer in the format, #Final answer : <Final_Answer>(only answer value NO units and no other text)#. Therefore the final answer is:"


In [None]:
parser = argparse.ArgumentParser(description='argparse')
parser.add_argument('--start', type=int, default=0,
                    help="Starting index")
parser.add_argument('--end', type=int, default=12500,
                    help="Ending index")
parser.add_argument('--base', type=str, default=12500,
                    help="api_base")
parser.add_argument('--version', type=str, default=12500,
                    help="api_version")
parser.add_argument('--key', type=str, default=12500,
                    help="api_key")
args = parser.parse_args()
start = int(args.start)
end = int(args.end)
base = args.base
version = args.version
key = args.key

backend_args = {
        'name': "openai",
        'top_p': 1,
        'temp': 0,
        'max_token': 3000,
        'presence_penalty': 1.5,
    }

quota_args = {
        'sleep_minutes': 1,
        'max_iter_per_instance': 4
}

In [None]:
final_json = []
for i in trange(start, end):
    index = data[i]["index"]
    problem = data[i]["problem"]
    ans_list = []
    for j in range(10):
        math = MATH_MedPrompt(base, version, key, "gpt-4-turbo", 'step', backend_args, quota_args)
        cot = data[i]["FS_CoT"][j]
        prompt_cot = "Problem - "+problem+"\nSolution - Let's think step by step :\n"+cot
        final_prompt = final_answer_prompt(prompt_cot)
        res_final = math.call_lm(final_prompt)
        ans_list.append(res_final.split(":")[-1][:-1])
    final_json.append({
        "index": index,
        "problem": problem,
        "answer_list" : ans_list})
    if i%1==0:
        with open(f"./10FS_Final_Pred/gpt4_{start}_{end}_fs_cot_pred.json", 'w') as f:
            json.dump(final_json, f)
            
with open(f"./10FS_Final_Pred/gpt4_{start}_{end}_fs_cot_pred.json", 'w') as f:
            json.dump(final_json, f)

## Out of the 10 final answers for each test question in 10FS_Final_Pred , answer with highest occurrence is chosen as final answer