## Ingestion Part

In [1]:
# ===============================================================================================================
#                                               Loading Documents
# ===============================================================================================================

# documents of all subdirectory
import os
import glob
from langchain_community.document_loaders import DirectoryLoader, TextLoader

root = "data\knowledge-base"
sub_roots = glob.glob(root + "\*")

documents = []
for sub_root in sub_roots:
    sub_name = sub_root.split("\\")[-1]
    loader = DirectoryLoader(sub_root, glob = "*.md", loader_cls=TextLoader, loader_kwargs={"encoding": "utf-8"})
    docs   = loader.load()
    # print(len(docs))

    for doc in docs:
        doc.metadata["doc_type"] = sub_name
        documents.append(doc)

    print(f"{sub_name} has {len(docs)} documents")


print(f"\ntotal number of documents {len(documents)}")


# ===============================================================================================================
#                                               Documents into Chunk
# ===============================================================================================================

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

print(f"\nnum. chunks {len(chunks)}")

# ===============================================================================================================
#                                               Vectorize and Store
# ===============================================================================================================

# an embedding model and a vectorDB

## model
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="nomic-embed-text")

## VectorDB
from langchain_chroma import Chroma

DB_NAME = "./vector_db"

if os.path.exists(DB_NAME):
        Chroma(persist_directory=DB_NAME, embedding_function=embeddings).delete_collection()

vectorstore = Chroma.from_documents( documents=chunks, embedding=embeddings, persist_directory=DB_NAME )

# _collection is the internal Chroma object
collection = vectorstore._collection

# Count how many vectors exist  (should be as same as the num.chunks)
num_embedding = collection.count()
print(f"\nnumber of embedding vectors {num_embedding}")

assert len(chunks) == num_embedding

# get one sample from DB
sample = collection.get(limit = 1, include=["embeddings"])["embeddings"][0]

dimensions = len(sample)
print(f"dimensions : {dimensions}")

  from .autonotebook import tqdm as notebook_tqdm


company has 4 documents
contracts has 32 documents
employees has 32 documents
products has 8 documents

total number of documents 76

num. chunks 970

number of embedding vectors 970
dimensions : 768


## Answer

In [5]:
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings, ChatOllama



DB_NAME = "./vector_db"  
embeddings = OllamaEmbeddings(model="nomic-embed-text")

vectorstore = Chroma(persist_directory = DB_NAME, embedding_function=embeddings)     # path, Embedding Model


# when a query comes >> search and retrieve
retriever = vectorstore.as_retriever()


# define LLM model
llm = ChatOllama(model="phi",temperature=0) 

In [6]:
SYSTEM_PROMPT = """
You are a knowledgeable, friendly assistant representing the company Insurellm.
You are chatting with a user about Insurellm.
If relevant, use the given context to answer any question.
If you don't know the answer, say so.
Context:
{context}
"""

In [9]:
from langchain_core.messages import SystemMessage, HumanMessage, convert_to_messages

# in a chatbot, user may asks a question from db even in a middle of the converstaion

# 1. combine query with the past converstation
# 2. retrieve from db
# 3. combine content of the retrieved documents
# 4. add the new info the system prompt
# 5. history looks like:
#       history = [
#           {"role": "user", "content": "What is Insurellm?"},
#           {"role": "assistant", "content": "Insurellm is..."},
#           ]
# After conversion: convert_to_messages(history)
# it looks like:
#   [
#       HumanMessage("What is Insurellm?"),
#       AIMessage("Insurellm is...")
#   ]
# because history is multiple messages :: use extend, but HumanMessage is only one :: use append
#
# 6. ask llm to generate response


def answer(query, history):

    # 1
    prior    =  "\n".join(m["content"] for m in history if m["role"] == "user")
    combined = f"{prior}\n{query}" if prior else query

    # 2
    docs =    retriever.invoke(combined, k=3)

    # 3
    context = "\n\n".join(doc.page_content for doc in docs)

    # 4
    system_prompt = SYSTEM_PROMPT.format(context=context)

    # 5
    messages = [SystemMessage(content=system_prompt)]
    messages.extend(convert_to_messages(history))
    messages.append(HumanMessage(content=query))

    # 6
    response = llm.invoke(messages)

    return response.content, docs


In [10]:
if __name__ == "__main__":
    answer_text, docs = answer(
        query="What is Insurellm?",
        history=[]
    )

    print("ANSWER:\n", answer_text)
    print("\nSOURCES:")
    for i, doc in enumerate(docs, 1):
        print(f"{i}. {doc.metadata.get('source')}")


ANSWER:
  Insurellm is a company that provides insurance solutions for various industries. We offer comprehensive coverage options to protect businesses and individuals from financial losses due to unexpected events such as accidents, theft, or natural disasters. Our team of experts works closely with our clients to understand their unique needs and provide customized insurance plans that meet their requirements.


SOURCES:
1. data\knowledge-base\products\Homellm.md
2. data\knowledge-base\products\Homellm.md
3. data\knowledge-base\contracts\Contract with Velocity Auto Solutions for Carllm.md


In [11]:
history = []

q1 = "What is Insurellm?"
a1, docs1 = answer(q1, history)
print("A1:", a1)

history.append({"role": "user", "content": q1})
history.append({"role": "assistant", "content": a1})

q2 = "Who is it for?"
a2, docs2 = answer(q2, history)
print("A2:", a2)


A1:  Insurellm is a company that provides insurance solutions for various industries. They offer a range of products and services to help businesses protect their assets, employees, and customers from unexpected events. Their team of experts works closely with clients to understand their unique needs and develop customized insurance plans that provide peace of mind and financial security.

A2:  Insurellm is for anyone who wants to protect themselves or their business from potential risks and liabilities. Whether you are an individual looking for personal insurance, a small business owner seeking coverage for your assets and employees, or a large corporation requiring comprehensive risk management solutions, Insurellm has the expertise and experience to help you find the right insurance products that meet your specific needs.




## Step by Step explaination of the answer part

In [None]:
# What history looks like

history = [
    {"role": "user", "content": "What is Insurellm?"},
    {"role": "assistant", "content": "Insurellm is..."},
    {"role": "user", "content": "Who is it for?"}
]

# first step to answer  >> keep only user contents from previous conversations, ignore assistant

m["content"] for m in history if m["role"] == "user"

# new step: join them each in a new line
prior = "\n".join(...)

# content is now:
#   What is Insurellm?
#   Who is it for?

# now, a query comes :: join it 
combined = f"{prior}\n{query}" if prior else query


# new content is:
#       What is Insurellm?
#       Who is it for?
#       Is it secure?


In [None]:
# What retriever actually is :: a wrapper around Chroma

docs = retriever.invoke(combined)

# always “asks” the DB, because that’s the only place where your knowledge chunks live.
# You always get something (never empty)
# But the chunks may be irrelevant or nonsensical in context


# How to handle irrelevant chunks?

# Since retrieval always returns something, it’s the LLM’s job to ignore irrelevant context.

# Your system prompt helps with this:

"If relevant, use the given context to answer any question. If you don't know the answer, say so."

SYSTEM_PROMPT = """
You are a knowledgeable, friendly assistant representing the company Insurellm.
You are chatting with a user about Insurellm.
If relevant, use the given context to answer any question.
If you don't know the answer, say so.
Context:
{context}
"""

