## Expert Knowledge Worker

A question answering agent based on inputs as an expert knowledge worker

In [38]:
import os
import glob
from dotenv import load_dotenv
import gradio as gr

In [39]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [40]:
# MODEL = "gpt-4o-mini"
db_name = "vector_db"

In [41]:
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key')

In [42]:
folders = glob.glob("knowledge-base/*")
print(folders)

documents =[]
for folder in folders:
    doc_type = os.path.basename(folder)
    print(doc_type)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs={"autodetect_encoding": True})
    folder_docs = loader.load()
    print(f"Loaded {len(folder_docs)} documents from {folder}")
    for doc in folder_docs:
        doc.metadata['doc_type'] = doc_type
        documents.append(doc)

['knowledge-base\\company', 'knowledge-base\\contracts', 'knowledge-base\\employees', 'knowledge-base\\products']
company
Loaded 3 documents from knowledge-base\company
contracts
Loaded 12 documents from knowledge-base\contracts
employees
Loaded 12 documents from knowledge-base\employees
products
Loaded 4 documents from knowledge-base\products


In [43]:
# Split the documents into smaller chunks

text_splitter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)

In [44]:
chunks = text_splitter.split_documents(documents=documents)
chunks

Created a chunk of size 1088, which is longer than the specified 1000


[Document(metadata={'source': 'knowledge-base\\company\\about.md', 'doc_type': 'company'}, page_content="# About Insurellm\n\nInsurellm was founded by Avery Lancaster in 2015 as an insurance tech startup designed to disrupt an industry in need of innovative products. It's first product was Markellm, the marketplace connecting consumers with insurance providers.\nIt rapidly expanded, adding new products and clients, reaching 200 emmployees by 2024 with 12 offices across the US."),
 Document(metadata={'source': 'knowledge-base\\company\\careers.md', 'doc_type': 'company'}, page_content='# Careers at Insurellm\n\nInsurellm is hiring! We are looking for talented software engineers, data scientists and account executives to join our growing team. Come be a part of our movement to disrupt the insurance sector.'),
 Document(metadata={'source': 'knowledge-base\\company\\overview.md', 'doc_type': 'company'}, page_content='# Overview of Insurellm\n\nInsurellm is an innovative insurance tech firm

In [45]:
len(chunks)

123

In [46]:
doc_type = set(chunk.metadata['doc_type'] for chunk in chunks)

In [47]:
print(', '.join(doc_type))

company, products, employees, contracts


## Time to work with Auto encoding LLMS:-

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/bert-base-nli-mean-tokens")

In [None]:
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

In [None]:
# create chroma
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)

In [None]:
print("Vector store created ")
vectorstore._collection.count()

Vector store created 


123

In [None]:
collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])['embeddings'][0]
dimensions = len(sample_embedding)
print(f"It has {dimensions} dimensions!")

It has 768 dimensions!


In [None]:
sample_embedding

array([ 9.21461731e-02,  3.02845418e-01,  4.49248701e-01, -1.74304113e-01,
        5.91190696e-01, -7.61397779e-01, -1.00077413e-01,  3.72903049e-01,
        6.91423863e-02, -5.78096032e-01, -3.70133156e-03,  6.12496324e-02,
        5.86309075e-01,  5.40488124e-01, -8.40113938e-01,  9.60142016e-02,
       -8.37187290e-01,  1.73504516e-01, -6.67945519e-02, -6.89963698e-02,
       -6.69081092e-01, -8.70833516e-01,  9.67789531e-01,  5.56396186e-01,
        8.83142471e-01,  2.88785279e-01, -5.67811430e-01, -4.58049849e-02,
       -4.48356390e-01,  4.71015781e-01, -2.87866443e-01,  2.01391026e-01,
       -1.66717321e-01, -5.64917386e-01, -2.54858315e-01,  8.53693187e-01,
       -2.23889947e-01, -2.03594178e-01,  7.49519840e-02,  9.58966929e-03,
       -7.66216397e-01, -3.58523309e-01, -3.45459193e-01,  1.44759327e-01,
       -1.07781339e+00, -5.32030836e-02, -9.48462367e-01,  4.31316078e-01,
        2.56480038e-01, -5.27471900e-01,  5.12798488e-01,  1.42748281e-01,
       -1.85695186e-01, -

## Visualize the vector store

In [None]:
result = collection.get(include = ['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
doc_types = [metadata['doc_type'] for metadata in result['metadatas']]
colors = [['blue', 'green', 'red', 'orange'][['products', 'employees', 'contracts', 'company'].index(t)] for t in doc_types]

In [None]:
tsne = TSNE(n_components=2, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

fig = go.Figure(data= [go.Scatter(
                                x=reduced_vectors[:, 0],
                                  y = reduced_vectors[:, 1],
                                  mode='markers',
                                  marker = dict(size=5, color=colors),
                                  text = [f'Type: {t} <br> Text:{d[:100]}...' for t, d in zip(doc_types, documents)]
                                  )])

fig.update_layout(
    title='2D vector representation',
    scene = dict(xaxis_title='x', yaxis_title='y'),
    width=800,
    height=600,
        margin = dict(r=20, b=10, l=10, t=40) # helps in better visualization

)

fig.show()

In [None]:
tsne = TSNE(n_components=3, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

fig = go.Figure(data= [go.Scatter3d(
                                x=reduced_vectors[:, 0],
                                  y = reduced_vectors[:, 1],
                                  z = reduced_vectors[:, 2],
                                  mode='markers',
                                  marker = dict(size=5, color=colors),
                                  text = [f'Type: {t} <br> Text:{d[:100]}...' for t, d in zip(doc_types, documents)]
                                  )])

fig.update_layout(
    title='3D vector representation',
    scene = dict(xaxis_title='x', yaxis_title='y', zaxis_title = 'z'),
    width=900,
    height=700,
    margin = dict(r=20, b=10, l=10, t=40) # helps in better visualization
)

fig.show()

## Time to bring it together for RAG pipeline

In [None]:
from langchain_ollama import ChatOllama

#llm = ChatOpenAI(temperature = 0.7, model_name=MODEL)# your llm goes here 
llm = ChatOllama(model="llama3.2")
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

retriever = vectorstore.as_retriever()

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory = memory)


Please see the migration guide at: https://python.langchain.com/docs/versions/migrating_memory/



In [None]:
query = "Can you describe Insurellm"

result = conversation_chain.invoke({"question": query})
print(result["answer"])

I don't know any specific details about Insurellm beyond what is provided in the contract document, which appears to be a fictional agreement between Insurellm and another party (likely an insurance client). The document does not provide information on the company's background, history, products or services offered, mission, or any other relevant details.


In [None]:
from langchain_huggingface import HuggingFaceEndpoint
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import os


# Use a Hugging Face model through their Inference API
llm = HuggingFaceEndpoint(
    endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0.7,
    top_p=0.95,
)

# Your existing retriever code
retriever = vectorstore.as_retriever()

# Set up memory component
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Create the conversation chain
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [None]:
# query = "Can you describe Insurellm in a few sentences"

# result = conversation_chain.invoke({"question": query})
# print(result["answer"])


'post' (from 'huggingface_hub.inference._client') is deprecated and will be removed from version '0.31.0'. Making direct POST requests to the inference server is not supported anymore. Please use task methods instead (e.g. `InferenceClient.chat_completion`). If your use case is not supported, please open an issue in https://github.com/huggingface/huggingface_hub.



 Insurellm is a fictional insurance company that provides the Homellm product to its clients. This product includes AI-powered risk assessment, dynamic pricing model, instant claim processing, predictive maintenance alerts, multi-channel integration, and access to a customer portal. Insurellm is a party to this contract with Greenstone Insurance.


## Time for Gradio

In [None]:
def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [None]:
view= gr.ChatInterface(chat).launch()


The 'tuples' format for chatbot messages is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style 'role' and 'content' keys.



* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.



'post' (from 'huggingface_hub.inference._client') is deprecated and will be removed from version '0.31.0'. Making direct POST requests to the inference server is not supported anymore. Please use task methods instead (e.g. `InferenceClient.chat_completion`). If your use case is not supported, please open an issue in https://github.com/huggingface/huggingface_hub.


'post' (from 'huggingface_hub.inference._client') is deprecated and will be removed from version '0.31.0'. Making direct POST requests to the inference server is not supported anymore. Please use task methods instead (e.g. `InferenceClient.chat_completion`). If your use case is not supported, please open an issue in https://github.com/huggingface/huggingface_hub.


'post' (from 'huggingface_hub.inference._client') is deprecated and will be removed from version '0.31.0'. Making direct POST requests to the inference server is not supported anymore. Please use task methods instead (e.g. `InferenceClient.chat_completion`). If y