In [1]:
import chromadb
from libraries.db import DatabaseProxy

In [2]:
chroma_client = chromadb.PersistentClient("./instance")
collection = chroma_client.create_collection(
    name="my_collection", get_or_create=True, metadata={"hnsw:space": "ip"}
)

In [3]:
db = DatabaseProxy()

In [4]:
questions = db.get_questions()

In [5]:
documents = []
metadatas = []
ids = []
for question in questions:
    documents.append(question.text)
    metadatas.append({"id": question.id})
    ids.append(str(question.id))

In [6]:
if not collection.count():
    collection.add(ids, documents=documents, metadatas=metadatas)
collection.count()

299

In [7]:
questions[:5]

[Question(is_active=True, modified_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), id=1, created_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), text='How many candidates do you have in your database?', answer_id=1),
 Question(is_active=True, modified_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), id=2, created_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), text='How often do you review and refresh your database of candidates?', answer_id=2),
 Question(is_active=True, modified_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), id=3, created_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), text='If you are a member of the American Staffing Association, what certifications do you hold?', answer_id=3),
 Question(is_active=True, modified_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), id=4, created_datetime=datetime.datetime(2023, 11, 17, 22, 26, 43), text='How do you determine which employees need coverage under the ACA?', answer_id=4),
 Question(is

In [24]:
def print_results(results):
    for i in range(len(results["ids"][0])):
        question = db.get_question_by_id(results["ids"][0][i])
        print(round(results["distances"][0][i], 2), question.answer_id, results["documents"][0][i])

In [31]:
from collections import defaultdict


def nearest_neighbors(results):
    classes = defaultdict(int)
    for i in range(len(results["ids"][0])):
        question = de.get_question_by_id(results["ids"][0][i])
        score = 1 - round(results["distances"][0][i], 2)
        classes[question.answer_id] += score

    print(dict(classes))

    return max(classes, key=classes.get)


In [32]:
results = collection.query(query_texts=["How many employees work at your firm?"], n_results=5)
print_results(results)

0.29 6 How many employees does your company currently have, and could you give us a brief overview of your history?
0.3 6 What is the employee count for your company, and can you give us a concise overview of its history?
0.34 6 Could you provide the number of employees at your company and give us a brief background of its journey?
0.38 6 Can you give us a summary of your company's background and provide the number of employees?
0.43 6 Tell us about the size of your workforce and provide a brief summary of your company's past.


In [33]:
results = collection.query(query_texts=["How often do you hire new employees?"], n_results=5)
print_results(results)

0.46 2 How often are new candidates added to your sourcing platform for your teams to view?
0.46 2 How regularly do your teams have access to fresh data on candidates who are actively seeking employment?
0.48 47 What action or steps do you implement to retain employees throughout the season?
0.5 49 How often do you collect feedback from the associates you've placed to gauge their level of satisfaction?
0.51 47 What strategies do you implement to ensure employee retention throughout the year?


In [34]:
results = collection.query(
    query_texts=["How will WalMart be involved in the hiring and interview process?"], n_results=10
)
print_results(results)


0.56 46 Could you elaborate on the screening procedures your firm implements beyond employment verification and background checks?
0.6 46 What other screening methods does your firm employ aside from the standard employment verification and background checks?
0.62 45 How does Partners Personnel conduct background screenings and what factors are considered in the process?
0.63 45 Could you provide details on the screening criteria and methodology employed by your company?
0.63 46 Are there any supplementary screening protocols that your firm follows aside from verifying employment and conducting background checks?
0.64 6 Could you provide the number of employees at your company and give us a brief background of its journey?
0.65 46 Does your firm engage in any additional screening behind employment verification and background checks?
0.65 1 As of today, how many job seekers are listed in your database?
0.66 46 Apart from the usual background checks and employment verification, are there

In [35]:
results = collection.query(query_texts=["Where do your employees like to go trick-or-treating?"], n_results=10)
print_results(results)
nearest_neighbors(results)

0.59 47 How do you go about keeping your employees engaged and committed to their roles for the duration of the season?
0.64 47 What action or steps do you implement to retain employees throughout the season?
0.65 47 Could you share some of the initiatives you have in place to retain your employees during the seasonal period?
0.65 47 What measures do you take to motivate and incentivize your employees to stay with your company?
0.7 49 What system do you have in place to regularly monitor the satisfaction of the employees you've placed?
0.74 47 What steps do you take to create a positive and supportive work environment that encourages employee retention?
0.74 46 What other screening methods does your firm employ aside from the standard employment verification and background checks?
0.75 16 What security measures are in place to control access to the production environment with sensitive services, functions, and environments?
0.75 49 How often do you collect feedback from the associates 

47

In [36]:
results = collection.query(
    query_texts=["What is your website?"], n_results=10
)
print_results(results)
nearest_neighbors(results)

0.25 42 What is the URL for your website?
0.64 42 What is the name of the website for Partners Personnel?
0.64 42 What is the website address for Partners Personnel?
0.71 1 How many candidates do you have in your database?
0.74 1 As of today, how many job seekers are listed in your database?
0.74 1 Can you tell me about the current size of your candidate database?
0.74 42 Can you provide me with the web link for Partners Personnel?
0.75 2 What is the frequency at which your sourcing platform is updated with new candidate information?
0.77 10 Is there a way for the public to view your Security/Compliance reports?
0.77 1 Could you provide an overview of the quantity of candidates present in your database at this time?
{42: 1.7299999999999998, 1: 1.04, 2: 0.25, 10: 0.22999999999999998}


42