## Expert Knowledge Worker

A question answering agent based on inputs as an expert knowledge worker

In [75]:
import os
import glob
from dotenv import load_dotenv
import gradio as gr

In [136]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_huggingface import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
from langchain.embeddings import SentenceTransformerEmbeddings

from langchain_chroma import Chroma
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [137]:
# MODEL = "gpt-4o-mini"
db_name = "vector_db"

In [138]:
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key')

In [139]:
folders = glob.glob("knowledge-base/*")
print(folders)

documents =[]
for folder in folders:
    doc_type = os.path.basename(folder)
    print(doc_type)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs={"autodetect_encoding": True})
    folder_docs = loader.load()
    print(f"Loaded {len(folder_docs)} documents from {folder}")
    for doc in folder_docs:
        doc.metadata['doc_type'] = doc_type
        documents.append(doc)

['knowledge-base\\company', 'knowledge-base\\contracts', 'knowledge-base\\employees', 'knowledge-base\\products']
company
Loaded 3 documents from knowledge-base\company
contracts
Loaded 12 documents from knowledge-base\contracts
employees
Loaded 12 documents from knowledge-base\employees
products
Loaded 4 documents from knowledge-base\products


In [140]:
# Split the documents into smaller chunks

text_splitter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)

In [141]:
chunks = text_splitter.split_documents(documents=documents)
chunks

Created a chunk of size 1088, which is longer than the specified 1000


[Document(metadata={'source': 'knowledge-base\\company\\about.md', 'doc_type': 'company'}, page_content="# About Insurellm\n\nInsurellm was founded by Avery Lancaster in 2015 as an insurance tech startup designed to disrupt an industry in need of innovative products. It's first product was Markellm, the marketplace connecting consumers with insurance providers.\nIt rapidly expanded, adding new products and clients, reaching 200 emmployees by 2024 with 12 offices across the US."),
 Document(metadata={'source': 'knowledge-base\\company\\careers.md', 'doc_type': 'company'}, page_content='# Careers at Insurellm\n\nInsurellm is hiring! We are looking for talented software engineers, data scientists and account executives to join our growing team. Come be a part of our movement to disrupt the insurance sector.'),
 Document(metadata={'source': 'knowledge-base\\company\\overview.md', 'doc_type': 'company'}, page_content='# Overview of Insurellm\n\nInsurellm is an innovative insurance tech firm

In [142]:
len(chunks)

123

In [143]:
doc_type = set(chunk.metadata['doc_type'] for chunk in chunks)

In [144]:
print(', '.join(doc_type))

company, products, employees, contracts


## Time to work with Auto encoding LLMS:-

In [174]:
# hf_token should be retrieved it is your API key from hugging face
hf_token = os.environ.get('HUGGINGFACE_API_KEY', 'your-key')

# Use the SentenceTransformer model for embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-distilroberta-v1",
    model_kwargs={"token": hf_token}
)

# list of all amazing embeddings models
# Try these exact model paths
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
# # Or
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-distilroberta-v1")
# # Or
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-roberta-large-v1")
# # Or
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-distilbart-large-cnn")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development



config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.1k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/653 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [152]:
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

In [153]:
# create chroma
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)

In [154]:
print("Vector store created ")
vectorstore._collection.count()

Vector store created 


123

In [155]:
collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])['embeddings'][0]
dimensions = len(sample_embedding)
print(f"It has {dimensions} dimensions!")

It has 384 dimensions!


In [156]:
sample_embedding

array([-4.51701917e-02, -4.03839257e-03, -4.76737209e-02,  5.79420179e-02,
        7.09060347e-03, -1.20240683e-02,  1.13990456e-01,  3.22365388e-02,
       -2.38573048e-02, -2.50851177e-02,  7.44796917e-02,  2.48318091e-02,
        1.05261981e-01, -2.77122259e-02, -3.79518792e-02,  1.81514677e-02,
       -1.18474448e-02, -4.32383344e-02,  3.21236961e-02,  2.71694809e-02,
       -4.61054705e-02, -4.26115887e-03, -1.15558011e-02, -8.19237530e-03,
       -3.33760679e-02,  6.54423283e-03,  4.88266088e-02,  3.43601853e-02,
       -3.53719629e-02, -5.88529184e-02, -1.16648329e-02, -3.25215943e-02,
        2.13794243e-02,  6.14821464e-02,  1.26222731e-03,  1.59483161e-02,
       -2.78289709e-02, -7.27213025e-02, -9.33095142e-02,  4.26129717e-03,
        3.62291150e-02, -2.61923820e-02, -4.81943451e-02, -2.92451847e-02,
       -9.08269957e-02,  5.86406142e-02,  4.04887535e-02,  6.68057352e-02,
       -5.96315973e-02,  1.25991955e-01, -2.81627215e-02,  5.62679246e-02,
        2.90568341e-02, -

## Visualize the vector store

In [157]:
result = collection.get(include = ['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
doc_types = [metadata['doc_type'] for metadata in result['metadatas']]
colors = [['blue', 'green', 'red', 'orange'][['products', 'employees', 'contracts', 'company'].index(t)] for t in doc_types]

In [158]:
tsne = TSNE(n_components=2, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

fig = go.Figure(data= [go.Scatter(
                                x=reduced_vectors[:, 0],
                                  y = reduced_vectors[:, 1],
                                  mode='markers',
                                  marker = dict(size=5, color=colors),
                                  text = [f'Type: {t} <br> Text:{d[:100]}...' for t, d in zip(doc_types, documents)]
                                  )])

fig.update_layout(
    title='2D vector representation',
    scene = dict(xaxis_title='x', yaxis_title='y'),
    width=800,
    height=600,
        margin = dict(r=20, b=10, l=10, t=40) # helps in better visualization

)

fig.show()

In [159]:
tsne = TSNE(n_components=3, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

fig = go.Figure(data= [go.Scatter3d(
                                x=reduced_vectors[:, 0],
                                  y = reduced_vectors[:, 1],
                                  z = reduced_vectors[:, 2],
                                  mode='markers',
                                  marker = dict(size=5, color=colors),
                                  text = [f'Type: {t} <br> Text:{d[:100]}...' for t, d in zip(doc_types, documents)]
                                  )])

fig.update_layout(
    title='3D vector representation',
    scene = dict(xaxis_title='x', yaxis_title='y', zaxis_title = 'z'),
    width=900,
    height=700,
    margin = dict(r=20, b=10, l=10, t=40) # helps in better visualization
)

fig.show()

## Time to bring it together for RAG pipeline

First the `ChatOllama`

In [160]:
from langchain_ollama import ChatOllama

#llm = ChatOpenAI(temperature = 0.7, model_name=MODEL)# your llm goes here 
llm = ChatOllama(model="llama3.2:1b")
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

retriever = vectorstore.as_retriever()

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory = memory)

In [161]:
query = "Can you describe Insurellm"

result = conversation_chain.invoke({"question": query})
print(result["answer"])

Based on the provided context, I can summarize what Insurellm is:

Insurellm is an insurance tech startup founded by Avery Lancaster in 2015. It was initially known as Markellm, but has since expanded to offer multiple insurance software products and services.

The company's products include:

1. Carllm (auto insurance portal)
2. Homellm (home insurance portal)
3. Rellm (reinsurance platform)
4. Marketllm (marketplace for connecting consumers with insurance providers)

Insurellm has over 300 clients worldwide and offers a comprehensive suite of solutions to the insurance industry, aiming to transform the landscape of home insurance while ensuring innovation and reliability.

That's the general overview of Insurellm based on the provided context.


## The ollama causes problem as I do not have a GPU it is painfully slow hence let's use inference API from hugging face

In [163]:
from langchain_huggingface import HuggingFaceEndpoint
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import os


# Use a Hugging Face model through their Inference API
llm = HuggingFaceEndpoint(
    endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0.7,
    top_p=0.95,
)

# Your existing retriever code
retriever = vectorstore.as_retriever()

# Set up memory component
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Create the conversation chain
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [164]:
query = "Who found Insurellm"

result = conversation_chain.invoke({"question": query})
print(result["answer"])


'post' (from 'huggingface_hub.inference._client') is deprecated and will be removed from version '0.31.0'. Making direct POST requests to the inference server is not supported anymore. Please use task methods instead (e.g. `InferenceClient.chat_completion`). If your use case is not supported, please open an issue in https://github.com/huggingface/huggingface_hub.



 Avery Lancaster founded Insurellm in 2015.


## Time for Gradio

In [165]:
def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [166]:
view= gr.ChatInterface(chat).launch()


The 'tuples' format for chatbot messages is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style 'role' and 'content' keys.



* Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.


## Need to improve the RAG it is bad

In [168]:
from langchain_together import ChatTogether
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# Load the Together-hosted model (e.g., Mixtral)
llm = ChatTogether(
    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0.7,
    max_tokens=512
)

# Vector store setup (your retriever must already be prepared)
retriever = vectorstore.as_retriever()

# Add memory for multi-turn chat
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Build the Conversational Retrieval Chain
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

# Use it
query = "What is Insurellm?"
result = conversation_chain.invoke({"question": query})
print(result["answer"])


 Insurellm is an innovative insurance tech firm that offers four software products for the insurance industry: Carllm, Homellm, Rellm, and Marketllm. Insurellm has over 300 clients worldwide and a commitment to transforming the home insurance landscape with products like Homellm. The company provides technical support, response times, and training for its clients. Insurellm was founded in 2015 by Avery Lancaster and had grown to 200 employees across 12 offices in the US by 2024.


In [169]:
from langchain_groq import ChatGroq
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# 1. Setup Groq LLM
llm = ChatGroq(
    model="llama3-8b-8192",  # Other options mixtral-8x7b-32768, gemma-7b-it
    temperature=0.7
)

# 2. Setup Retriever (your FAISS, Chroma, etc.)
retriever = vectorstore.as_retriever()

# 3. Setup Memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# 4. Build Conversational Retrieval Chain
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

# 5. Ask a Question
query = "Who found Insurellm?"
result = conversation_chain.invoke({"question": query})
print(result["answer"])


According to the context, Insurellm was founded by Avery Lancaster in 2015.


In [135]:
from langchain_groq import ChatGroq
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_core.callbacks import StdOutCallbackHandler

# 1. Setup Groq LLM
llm = ChatGroq(
    model="llama3-8b-8192",  # Other options mixtral-8x7b-32768, gemma-7b-it
    temperature=0.7
)

# 2. Setup Retriever (your FAISS, Chroma, etc.)
retriever = vectorstore.as_retriever()

# 3. Memory 
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# 4. Build Conversational Retrieval Chain
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    callbacks=[StdOutCallbackHandler()] 
)

# 5. Ask a Question
query = "Who is Avery?"
result = conversation_chain.invoke({"question": query})
print(result["answer"])




[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
**Signatures:**  
_________________________                           _________________________  
**[Name], Title**                                   **[Name], Title**  
Insurellm                                           GreenField Holdings  
**Date:** ____________                             **Date:** ____________

**Insurellm, Inc.**  
_____________________________  
Authorized Signature   
Date: ___________________  

**Apex Reinsurance**  
_____________________________  
Authorized Signature  
Date: ___________________

---

**Signatures**  
**For Insurellm**: __________________________  
**Name**: John S

In [170]:
# guess we need to send in more chunks of data for better output

retriever = vectorstore.as_retriever(search_kwargs={'k':35})

llm = ChatGroq(
    model="llama-3.3-70b-versatile",  # Other options: https://console.groq.com/settings/models
    temperature=0.7
)

# 3. Memory 
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# 4. Build Conversational Retrieval Chain
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

# 5. Ask a Question
query = "What is Carllm?"
result = conversation_chain.invoke({"question": query})
print(result["answer"])

Carllm is an innovative auto insurance product developed by Insurellm, designed to streamline the way insurance companies offer coverage to their customers. It utilizes advanced algorithms and artificial intelligence to deliver personalized auto insurance solutions, ensuring optimal coverage while minimizing costs.


## This is the best RAG

In [171]:
def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

view= gr.ChatInterface(chat).launch()


The 'tuples' format for chatbot messages is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style 'role' and 'content' keys.



* Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.
