In [2]:
import os, getpass


# Get environment variables if not set
def _set_env(var: str):
        if not os.environ.get(var):
            os.environ[var] = getpass.getpass(f'Enter your {var} API key: ')

# Check necessary environment variables
_set_env('TAVILY_API_KEY')
_set_env('LANGCHAIN_API_KEY')

os.environ['TOKENIZERS_PARALLELISM'] = 'true'
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_PROJECT'] = 'langchain-rag-ollama'

### Search

# from langchain_community.tools.tavily_search import TavilySearchRun

In [1]:
## Load the model

from langchain_ollama import ChatOllama

local_llm = 'llama3.2'
llm = ChatOllama(model=local_llm, temperature=0.0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0.0, format='json')

### Vector store and embedding model
*Using basic query to test the vectorstore and embedding model.*

---------------------------

In [4]:
from embedding.vector_store import VectorStore
from embedding.embedding_models import EmbeddingModels

embedding_model = EmbeddingModels().get_bge_embedding(model_name="BAAI/bge-m3") # use default model
chroma_kb = VectorStore.get_chroma_vectorstore(
    vectorstore_path='./data/vector_database/peer_kb', 
    embedding=embedding_model
)

# query_text = 'what is ADRD' # failed, no results
query_text = 'what is the specialty for Chinese language. ' 
similarity_search_res = VectorStore.retrieve_docs(query=query_text, vectorstore=chroma_kb, k=5)

for doc in similarity_search_res:
    print(doc.page_content)
    print(doc.metadata)

[32m2025-01-13 @ 01:33:57[0m | [1mINFO    [0m | [36membedding.embedding_models[0m:[36mget_bge_embedding[0m:[36m28[0m - [1mUsing device: mps[0m
[32m2025-01-13 @ 01:33:57[0m | [1mINFO    [0m | [36membedding.embedding_models[0m:[36mget_bge_embedding[0m:[36m29[0m - [1mLoading BGE embedding model: BAAI/bge-m3[0m


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[32m2025-01-13 @ 01:34:00[0m | [1mINFO    [0m | [36membedding.embedding_models[0m:[36mget_bge_embedding[0m:[36m41[0m - [1mSuccessfully loaded BGE embedding model[0m
language too, I think. When he first moved here, I put signs in large letters on his drawers and doors to help him find things, but it seems now it is more of a matter of not being able to process the written language itself. He can sound out the word, but takes him a while for it to register what the word means.  We have a lighted magnifying glass for him to look at picture books, but managing the magnifying glass has become too hard for him. His glasses are bifocals and self tinting but even that has been confusing for him so we ordered some plain glasses hoping that will help.
{'author': 'Unknown', 'source': 'https://alzconnected.org/discussion/56340/it-s-time', 'source-tag': 'alzconnect', 'tag': 'Unknown', 'title': 'It’s time'}
Are language and translation services available? You and your friend or family me

### Retrieve grading 
*Preventing hallucination and erroneous retrieval that is not relevant to the question but based on the idiosyncrasies of the embedding model or chunking.*

---------------------

In [5]:
from typing import List
from checkpoints.retrieval_grading import grade_retrieval
from langchain.schema import Document

# Grade the retrieval results
graded_docs = grade_retrieval(query_text, similarity_search_res)

# Print graded results
# for doc in graded_docs:
#     print(f"Document content: {doc}")
    
# Filter out documents with relevance_score lower than 0.5
class ReasonedDocument:
    """
    A class to store the document, relevance score, and reasoning.
    """
    
    document: Document # Retrieved document from vector store in Document type
    relevance_score: float # Relevance score by reasoning model
    reasoning: str # Reasons for the relevance score
    
    def __init__(self, document, relevance_score, reasoning):
        self.document = document
        self.relevance_score = relevance_score
        self.reasoning = reasoning
        
    def __str__(self):
        return f"Document: {self.document}\nRelevance Score: {self.relevance_score}\nReasoning: {self.reasoning}\n"

# filtered_docs = [doc for doc, grade in zip(similarity_search_res, graded_docs) if grade["relevance_score"] >= 0.5]

filtered_docs = []

for doc, grade in zip(similarity_search_res, graded_docs):
    if grade["relevance_score"] >= 0.5:
        reasoned_doc = ReasonedDocument(
            document=doc,
            relevance_score=grade["relevance_score"],
            reasoning=grade["reasoning"]
        )
        filtered_docs.append(reasoned_doc)


# Print filtered results
for doc in filtered_docs:
    print(doc)
    # print(doc.metadata)



[32m2025-01-13 @ 01:34:05[0m | [1mINFO    [0m | [36mcheckpoints.retrieval_grading[0m:[36mgrade_retrieval[0m:[36m55[0m - [1mGrading relevance for question: what is the specialty for Chinese language. [0m


### Generate answer
*Using the graded documents to generate an answer.*

--------------------------------

In [1]:
from answer_generation import generate_answer

context_chunks = [doc.document for doc in filtered_docs]

# TODO: handle the case where context_chunks is empty, answer should specify that we don't have relevant information.

answer = generate_answer(query_text, context_chunks)

print(answer)

[32m2025-01-13 @ 01:37:39[0m | [1mINFO    [0m | [36mutils.logger[0m:[36m<module>[0m:[36m56[0m - [1mLogger initialized successfully[0m


NameError: name 'filtered_docs' is not defined