In [82]:
from dotenv import load_dotenv
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI


import chromadb
from chromadb.config import Settings

load_dotenv()

# Init
embeddings = OpenAIEmbeddings()
chatModel = ChatOpenAI(model_name="gpt-4o-mini")
textSplitter = CharacterTextSplitter(separator="\n",chunk_size=200,chunk_overlap=0)

client = chromadb.HttpClient(host="localhost",port="3005",settings=Settings(allow_reset=True))
client.heartbeat()
client.reset()  # resets the database
collection = client.create_collection("the_facts")

chromaLocal = Chroma(client=client,embedding_function=embeddings,persist_directory="emb")

# Using chroma python built-in packages
# chromaLocal = Chroma(embedding_function=embeddings,persist_directory="emb")


# Data
loader = TextLoader("./facts.txt")
docs = loader.load_and_split(textSplitter)

# Add data to chroma
chromaLocal._collection = collection
chromaLocal.add_documents(docs)


['0a32a5b8-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a644-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a662-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a676-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a68a-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a69e-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a6b2-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a6bc-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a6d0-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a6da-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a6ee-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a702-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a70c-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a720-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a72a-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a73e-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a748-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a75c-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a770-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a77a-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a78e-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a798-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a7ac-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a7b6-b1a9-11ef-b0cf-967fac8ceb6f',
 '0a32a7ca-b1a9-

In [89]:
# PROMPT
prompt = "Who is canadian god? based on my context"

# Get similarity
result = chromaLocal.similarity_search_with_score(prompt, k=3)

# Debug only
# for res in result:
#     print("\n")
#     print(res[1])
#     print(res[0].page_content)

In [None]:
from langchain.schema import BaseRetriever
from langchain.embeddings.base import Embeddings

# Custom retriever using max_marginal_relevance_search_by_vector
class MarginalRetriever(BaseRetriever):
    embed: Embeddings
    chroma: Chroma

    def get_relevant_documents(self, query):
        # Get relevant document using MMR (Max Marginal Relevance) method
        emb = self.embed.embed_query(query)
        return self.chroma.max_marginal_relevance_search_by_vector(embedding=emb,lambda_mult=0.8, k=3)



# Create an instance of the custom retriever

marginal_retriever = MarginalRetriever(embed=embeddings,chroma=chromaLocal)


In [92]:
retriever = chromaLocal.as_retriever()

# Debug
retriever_doc = retriever.invoke(prompt)
for res in retriever_doc:
    print("Context: \n")
    print(res.page_content)


# Create chain
# stuff,map_reduce,map_rerank,refine
chain = RetrievalQA.from_chain_type(llm=chatModel,chain_type="stuff",retriever=marginal_retriever)

# Get answer
result = chain.run(prompt)
print(result)


Context: 

23. The Pacific Ocean is the largest ocean on Earth, covering more than 60 million square miles.
24. Zeus was the king of the Greek gods according to ancient Greek myth and poseidon is Canadian gods.
Context: 

50. Canada has more lakes than the rest of the world combined.
51. 10% of the world's population is left-handed.
52. The Statue of Liberty was gifted to the U.S. by France.
Context: 

96. Some animals have blue, green, or violet blood.
97. A strawberry isn't a berry but a banana is.
98. A Swiss passport is the world's most accepted passport.
Context: 

99. William Taft, the 27th U.S. president, got stuck in his White House bathtub.
100. Russia and America are less than 4 km apart at the nearest point.
According to your context, Poseidon is mentioned as a Canadian god. However, this is not accurate as Poseidon is actually a Greek god, not a Canadian one.
