In [1]:
import pandas as pd
import openai
import os
import dotenv
from libraries.db import DatabaseProxy, Question
import re
from tqdm import tqdm

In [2]:
with open("resources/question_augment_prompt.txt", "r") as f:
    prompt = f.read()

In [3]:
dotenv.load_dotenv(".envrc")
api_key = os.environ["OPENAI_API_KEY"]
openai.api_key = api_key

In [4]:
db = DatabaseProxy()

In [5]:
questions = db.get_questions()
questions[:3]

[Question(text='How many candidates do you have in your database?', created_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), answer_id=1, modified_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), id=1, is_active=True),
 Question(text='How often do you review and refresh your database of candidates?', created_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), answer_id=2, modified_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), id=2, is_active=True),
 Question(text='If you are a member of the American Staffing Association, what certifications do you hold?', created_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), answer_id=3, modified_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), id=3, is_active=True)]

In [6]:
question_pattern = re.compile(r"\d\. (.*)\n?")

for question in tqdm(questions):
    answer = db.get_answer_by_id(question.answer_id)
    content = prompt.format(question=question.text, answer=answer.text)
    conversation = [{"role": "user", "content": content}]
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=conversation,
        max_tokens=1000,
    )
    text = response["choices"][0]["message"]["content"]
    choices = question_pattern.findall(text)
    choices = [choice.strip().strip('"') for choice in choices]
    for choice in choices:
        new_question = Question(text=choice, answer_id=answer.id)
        db.insert_question(new_question)

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 50/50 [02:09<00:00,  2.60s/it]


In [7]:
print("\n".join(choices))

What instances have you encountered relating to DFEH and EEOC claims?
Can you provide any examples of cases regarding DFEH and EEOC that you've dealt with?
In what situations have you been involved with DFEH and EEOC claims?
Have there been any notable matters concerning DFEH and EEOC that you've come across?
Could you share any experiences you've had handling DFEH and EEOC claims?


In [8]:
print(choices)

['What instances have you encountered relating to DFEH and EEOC claims?', "Can you provide any examples of cases regarding DFEH and EEOC that you've dealt with?", 'In what situations have you been involved with DFEH and EEOC claims?', "Have there been any notable matters concerning DFEH and EEOC that you've come across?", "Could you share any experiences you've had handling DFEH and EEOC claims?"]
