In [None]:
import os
import json
import chromadb
from langchain_text_splitters import RecursiveCharacterTextSplitter
from chromadb import Client
from chromadb.config import Settings
import chromadb.utils.embedding_functions as embedding_functions


directory_path = 'xxxx'  # Replace with your actual folder path


documents = []


for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        file_path = os.path.join(directory_path, filename)
        with open(file_path, 'r') as file:
            json_data = json.load(file)

            for key, value in json_data.items():
                documents.append({
                    'title': key.strip(),
                    'content': value.strip()
                })
#print(documents)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)

split_documents = []
for doc in documents:
    chunks = text_splitter.split_text(doc['content'])
    for chunk in chunks:
        split_documents.append({
            'title': doc['title'],
            'chunk': chunk
        })
print(len(split_documents))

chroma_client = chromadb.PersistentClient(path='./chroma-db')  # Specify your directory


collection_name = "documents_collection"
ef = embedding_functions.DefaultEmbeddingFunction()
collection= chroma_client.get_or_create_collection(name=collection_name,embedding_function=ef)
if collection.count() ==0:
# if not chroma_client.has_collection(collection_name):
#     collection = chroma_client.create_collection(name=collection_name,embedding_function=ef)
# else:
#     collection = chroma_client.get_collection(name=collection_name,embedding_function=ef)


    for i, doc in enumerate(split_documents):
        collection.add(
            documents=[doc['chunk']],
            metadatas=[{'title': doc['title']}],
            ids=[f"{doc['title']}_{i}"]
        )

print("Documents have been successfully stored in ChromaDB.")

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama
import chromadb.utils.embedding_functions as embedding_functions
import chromadb



collection_name = "documents_collection"
chroma_client = chromadb.PersistentClient(path='./chroma-db')
ef = embedding_functions.DefaultEmbeddingFunction()
collection= chroma_client.get_or_create_collection(name=collection_name,embedding_function=ef)

template1= """ Give me an answer for the {question}
and extract the most appropriate answer from {docs_list}
"""

prompt = ChatPromptTemplate.from_template(template1)

model = ChatOllama(model="llama3.2")

chain = prompt | model
query_result= collection.query(query_texts="What is the primary indication for Acetazolamide?", n_results=5)
#print(query_result)
docs_list= query_result["documents"][0]
dist_list= query_result["distances"][0]

print(chain.invoke({"question": "What is the primary indication for Acetazolamide?","docs_list": docs_list}).content)


