In [1]:
import pandas as pd
import tiktoken
from openai.embeddings_utils import get_embedding
import openai
import numpy as np
import csv
import json
import os
from sentence_transformers import util
import time

openai.api_key = ""

### Configurations

In [None]:
embedding_model = "text-embedding-ada-002"
top_similar_questions = 3
few_shot_examples_desired = 5
codex_engine = "code-davinci-002"
few_shot_max_tokens = 300
codex_time_delay = 3
engine_temperature = 0
engine_topP = 0

### Parse csv file to separate json files for each question

In [None]:
jsonFields  =  (
    "Id",
    "Question",
    "Solution",
    "ChatGPTSolution",
    "Grade",
    "SimilarQuestions",
    "FewShotQuestion",
    "FewShotAnswer",
    "FewSHotEvaluation"
)
with open('dataset/dataset.csv') as csvf:
    csvReader = csv.DictReader(csvf, jsonFields)
    for i, row in enumerate(csvReader):
        with open(f'questions/{i}.json', 'w') as jsonf:
            json.dump(row, jsonf, indent=4)

### Generate embeddings for each questions using OpenAI

In [4]:
if not os.path.exists('dataset/dataset_embeddings.json'):
    embeddings=[]
    for i in range(22):
        with open(f'questions/{i}.json', 'r') as jsonf:
            questions=json.load(jsonf)
        embedding=openai.Embedding.create(input = questions["Question"], 
                                            engine = embedding_model)['data'][0]['embedding']
        embeddings.append(embedding)
    embeddings={'list':embeddings}
    with open(f'dataset/dataset_embeddings.json', 'w') as jsonf:
        jsonf.write(json.dumps(embeddings))
    embeddings=np.array(embeddings['list'])
else:
    with open('dataset/dataset_embeddings.json', 'r') as f:
        elist = json.load(f)['list']
    embeddings=np.array(elist)

### Function for calculating cosine similarity

In [29]:
def cosine_similarity(embeddings, q):
    cos_sims = []
    cos_to_num = {}
    for j in range(len(embeddings)):
        cos_sim = util.cos_sim(embeddings[q], embeddings[j]).item()
        cos_to_num[cos_sim] = j
        cos_sims.append(cos_sim)
    ordered = sorted(cos_sims, reverse=True)
    closest_qs = []
    for val in ordered:
        closest_qs.append(cos_to_num[val])
    return closest_qs[1:]

### Function to calculate top N most similar questions for every question

In [25]:
for i in range(22):
    jsonPath='questions/'+str(i)+'.json'
    similarQuestions = cosine_similarity(embeddings, i)
    gradeOneSimilarQuestions = []
    for q in similarQuestions:
        questionPath='questions/'+str(q)+'.json'
        with open(questionPath, 'r') as jsonf:
            squestion = json.load(jsonf)
        grade = squestion['Grade']
        if grade == "1":
            gradeOneSimilarQuestions.append(q)
        if len(gradeOneSimilarQuestions) == top_similar_questions:
            break
    with open(jsonPath, 'r') as jsonf:
        question = json.load(jsonf)
        question['SimilarQuestions'] = gradeOneSimilarQuestions
    with open(jsonPath, 'w') as jsonf:
        json.dump(question, jsonf, indent=4)


### Few-shot algorithm using OpenAI

In [7]:
def execute_few_shot():
    for i in range(21):
        jsonPath='questions/'+str(i)+'.json'
        with open(jsonPath, 'r') as jsonf:
            question = json.load(jsonf)
        if question["Grade"] == '1':
            print("Few-shot not needed for question "+str(i))
        else:
            print("Performing few-shot for question "+str(i))
            fewShotInput = ''
            for closest in question["SimilarQuestions"]:
                similarJsonPath='questions/'+str(closest)+'.json'
                with open(similarJsonPath, 'r') as jsonf:
                    similarQuestion = json.load(jsonf)
                fewShotInput += similarQuestion["Question"]
                fewShotInput += similarQuestion["Solution"]+'\n\n'
            fewShotInput += question["Question"]
            question["FewShotQuestion"]=fewShotInput
            start = time.time()
            time.sleep(codex_time_delay)
            fewShotOutput = openai.Completion.create(engine = codex_engine, 
                                                        prompt = fewShotInput, 
                                                        max_tokens = few_shot_max_tokens, 
                                                        temperature = engine_temperature, 
                                                        top_p = engine_topP)['choices'][0]['text']
            question["FewShotAnswer"]=fewShotOutput
            with open(jsonPath, 'w') as jsonf:
                json.dump(question, jsonf, indent=4)

In [8]:
execute_few_shot()

Few-shot not needed for question 0
Performing few-shot for question 1
Few-shot not needed for question 2
Performing few-shot for question 3
Performing few-shot for question 4
Performing few-shot for question 5
Performing few-shot for question 6
Few-shot not needed for question 7
Few-shot not needed for question 8
Few-shot not needed for question 9
Few-shot not needed for question 10
Performing few-shot for question 11
Few-shot not needed for question 12
Few-shot not needed for question 13
Few-shot not needed for question 14
Performing few-shot for question 15
Few-shot not needed for question 16
Performing few-shot for question 17
Few-shot not needed for question 18
Few-shot not needed for question 19
Performing few-shot for question 20


### Below code is for generating documentation

In [27]:
# for i in range(22):
#     output=""
#     jsonPath='questions/'+str(i)+'.json'
#     with open(jsonPath, 'r') as jsonf:
#         question = json.load(jsonf)
#     a=np.array(question["SimilarQuestions"])
#     a[0:3]+=1
#     if question["Grade"] == "0":
#         output+="$$\\textbf{List of most similar questions}:$$"+','.join(map(str, a))+'\n'
#         output+="$$\\textbf{Few-shot method result}$$:"+question["FewShotAnswer"]+'\n'
#         output+="$$\\textbf{Few-shot method evaluation result}: $$ 0"+'\n'
#         output+="$$\\textbf{Code snippet:}$$ Same as question 2"+'\n'
#         output+="$$\\textbf{Comments or assumptions}:$$ Same as question 2"+'\n'
#     else:
#         output+="$$\\textbf{List of most similar questions}:$$"+','.join(map(str, a))+'\n'
#         output+="Few shot not required as the grade is 1 in zero shot"
#     with open('documentation/'+str(i)+'.txt', 'w') as file:
#         file.write(output)
            