In [1]:
import pandas as pd
import openai
import os
import dotenv
from libraries.db import DatabaseProxy, Question
import re
from tqdm import tqdm

In [2]:
prompt = [
    {
        "role": "system",
        "content": """Given the following question and answer pairs, generate 5 new ways to ask that question:
        For example:
        Question: "How often do you review and refresh your database of candidates?"
        Answer: "We refresh our database of candidates every 6 months."
        """
    },
    {
        "role": "user",
        "content": "Question: \"{question}\"\nAnswer: \"{answer}\""
    }
]

In [3]:
dotenv.load_dotenv(".envrc")
api_key = os.environ["OPENAI_API_KEY"]
openai.api_key = api_key

In [4]:
db = DatabaseProxy()

In [5]:
questions = db.get_questions()
questions[:3]

[Question(id=1, text='How many candidates do you have in your database?', answer_id=1),
 Question(id=2, text='How often do you review and refresh your database of candidates?', answer_id=2),
 Question(id=3, text='If you are a member of the American Staffing Association, what certifications do you hold?', answer_id=3)]

In [6]:
question_pattern = re.compile(r"\d\. (.*)\n?")

for question in tqdm(questions):
    answer = db.get_answer_by_id(question.answer_id)
    prompt[1]["content"] = prompt[1]["content"].format(question=question.text, answer=answer.text)
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=prompt,
        max_tokens=1000,
    )
    text = response["choices"][0]["message"]["content"]
    choices = question_pattern.findall(text)
    choices = [choice.strip().strip('"') for choice in choices]
    for choice in choices:
        new_question = Question(text=choice, answer_id=answer.id)
        db.insert_question(new_question)

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 50/50 [02:15<00:00,  2.71s/it]


In [7]:
print("\n".join(choices))

How frequently do you update the number of candidates in your database?
Can you provide an estimate of the total number of candidates currently in your database?
When was the last time you updated the number of candidates in your database?
Do you have an approximate count of the candidates in your database?
Are there any specific metrics you track to monitor the size of your candidate database?


In [8]:
print(choices)

['How frequently do you update the number of candidates in your database?', 'Can you provide an estimate of the total number of candidates currently in your database?', 'When was the last time you updated the number of candidates in your database?', 'Do you have an approximate count of the candidates in your database?', 'Are there any specific metrics you track to monitor the size of your candidate database?']
