In [2]:
!pip install chromadb sentence_transformers


Collecting chromadb
  Using cached chromadb-1.0.15-cp39-abi3-win_amd64.whl.metadata (7.1 kB)
Collecting sentence_transformers
  Using cached sentence_transformers-5.0.0-py3-none-any.whl.metadata (16 kB)
Collecting build>=1.0.3 (from chromadb)
  Using cached build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting pydantic>=1.9 (from chromadb)
  Using cached pydantic-2.11.7-py3-none-any.whl.metadata (67 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Using cached pybase64-1.4.1-cp312-cp312-win_amd64.whl.metadata (8.7 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Using cached uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)
Collecting numpy>=1.22.5 (from chromadb)
  Using cached numpy-2.3.1-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Using cached posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Using cached onnxruntime-1.22.0-cp312-cp312-win_amd64.whl.


[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import json

with open('knowledge_base.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Flatten policies
policy_chunks = []
for section in data['company_policies']:
    section_name = section['section']
    for item in section['items']:
        text = f"Section: {section_name}\nTitle: {item['title']}\nDescription: {item['description']}"
        policy_chunks.append({
            "type": "policy",
            "section": section_name,
            "title": item["title"],
            "content": text
        })

# Flatten FAQs
faq_chunks = []
for faq in data['faqs']:
    text = f"Q: {faq['question']}\nA: {faq['answer']}"
    faq_chunks.append({
        "type": "faq",
        "question": faq["question"],
        "content": text
    })

# Flatten Templates
template_chunks = []
for template in data['response_templates']:
    text = f"Template Name: {template['template_name']}\nTemplate: {template['template']}"
    template_chunks.append({
        "type": "template",
        "template_name": template["template_name"],
        "content": text
    })


In [5]:
all_chunks = policy_chunks + faq_chunks + template_chunks


In [6]:
import chromadb
from chromadb.config import Settings

chroma_client = chromadb.Client(Settings(persist_directory="./chroma_db"))
collection = chroma_client.create_collection("company_knowledge")

# Add each chunk as a document
for idx, chunk in enumerate(all_chunks):
    collection.add(
        documents=[chunk["content"]],
        ids=[f"doc_{idx}"],
        metadatas=[chunk]
    )
print(f"Stored {len(all_chunks)} documents in ChromaDB.")


Stored 57 documents in ChromaDB.


In [7]:
def search_knowledge(query, n_results=3):
    results = collection.query(
        query_texts=[query],
        n_results=n_results
    )
    for doc, meta in zip(results['documents'][0], results['metadatas'][0]):
        print("----")
        print(doc)
        print("Metadata:", meta)

# Try searching:
search_knowledge("How many days of sick leave do employees get?")


----
Section: Leave Policy
Title: Sick Leave
Description: Employees are entitled to 12 days of paid sick leave annually.
Metadata: {'content': 'Section: Leave Policy\nTitle: Sick Leave\nDescription: Employees are entitled to 12 days of paid sick leave annually.', 'title': 'Sick Leave', 'section': 'Leave Policy', 'type': 'policy'}
----
Section: Leave Policy
Title: Casual Leave
Description: 8 days of casual leave are allowed per year, not to be carried forward.
Metadata: {'section': 'Leave Policy', 'title': 'Casual Leave', 'type': 'policy', 'content': 'Section: Leave Policy\nTitle: Casual Leave\nDescription: 8 days of casual leave are allowed per year, not to be carried forward.'}
----
Q: How do I apply for sick leave?
A: Log in to HRMS and apply under Sick Leave section.
Metadata: {'content': 'Q: How do I apply for sick leave?\nA: Log in to HRMS and apply under Sick Leave section.', 'type': 'faq', 'question': 'How do I apply for sick leave?'}
