In [33]:
import os
import glob

from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_openai import ChatOpenAI

import gradio as gr



In [34]:
load_dotenv()


True

In [35]:
#using lanchain's loaders
folders = glob.glob("knowledge-base/*")

documents=[]

for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs={'encoding' : 'utf-8'})
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)
print(len(documents))

76


In [36]:
documents[1]

Document(metadata={'source': 'knowledge-base\\company\\careers.md', 'doc_type': 'company'}, page_content="# Careers at Insurellm\n\n## Why Join Insurellm?\n\nAt Insurellm, we're not just building software—we're revolutionizing an entire industry. Since our founding in 2015, we've evolved from a high-growth startup to a lean, profitable company with 32 highly talented employees managing 32 active contracts across all eight of our product lines.\n\nAfter reaching 200 employees in 2020, we strategically restructured in 2022-2023 to focus on sustainable growth, operational excellence, and building a world-class remote-first culture. Today, we're a tight-knit team of exceptional professionals who deliver outsized impact through automation, AI, and strategic focus on high-value enterprise clients—from regional insurers to global reinsurance partners.\n\n### Our Culture\n\nWe live by our core values every day:\n- **Innovation First**: We encourage experimentation and creative problem-solving\

In [37]:
# divide into chunks using the recursivetextsplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 800, chunk_overlap = 250)
chunks= text_splitter.split_documents(documents) 
chunks[0]

Document(metadata={'source': 'knowledge-base\\company\\about.md', 'doc_type': 'company'}, page_content='# About Insurellm\n\nInsurellm was founded by Avery Lancaster in 2015 as an insurance tech startup designed to disrupt an industry in need of innovative products. Its first product was Markellm, the marketplace connecting consumers with insurance providers.\n\nThe company experienced rapid growth in its first five years, expanding its product portfolio to include Carllm (auto insurance portal), Homellm (home insurance portal), and Rellm (enterprise reinsurance platform). By 2020, Insurellm had reached a peak of 200 employees with 12 offices across the US.')

In [38]:
chunks[100]

Document(metadata={'source': 'knowledge-base\\contracts\\Contract with Evergreen Life Insurance for Lifellm.md', 'doc_type': 'contracts'}, page_content='---\n\n## Features\n\nEvergreen Life Insurance will receive the following Starter Tier features:\n\n1. **AI-Powered Underwriting:** Accelerated underwriting process analyzing:\n   - Medical histories and prescription databases (Milliman IntelliScript)\n   - Motor vehicle records (MVRs)\n   - Credit-based insurance scores\n   - Lifestyle and occupation risk factors\n   - Automated underwriting decisions for standard risks\n\n2. **Risk Assessment:** AI-driven mortality risk modeling considering:\n   - Age, gender, and family medical history\n   - Current health conditions and lab values\n   - Tobacco and alcohol use\n   - High-risk activities and occupations')

### Pick an embedding model and create a vector database

In [39]:
embeddings = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

db_name = "vector_db"

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()
    
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(vectorstore._collection.count())

552


### setting up langchain objects: 
### 1)retreiver    2)llm

In [40]:
retreiver = vectorstore.as_retriever(search_type="similarity",search_kwargs={"k": 4})
# llm = ChatGoogleGenerativeAI(
#     model="gemini-3-flash-preview",
#     temperature=0.3,
#     max_output_tokens=512,
#     google_api_key=os.getenv("GOOGLE_API_KEY")
# )
llm = ChatOpenAI(
    model="xiaomi/mimo-v2-flash:free",  # example
    openai_api_key=os.getenv("OPENROUTER_API_KEY"),
    openai_api_base="https://openrouter.ai/api/v1",
    temperature=0.3,
    max_tokens=512,
)

In [41]:
retreiver.invoke("Who is Avery?")

[Document(id='d13ed3f8-b893-415a-806b-815f84793e89', metadata={'source': 'knowledge-base\\employees\\Avery Lancaster.md', 'doc_type': 'employees'}, page_content="## Other HR Notes\n- **Professional Development**: Avery has actively participated in leadership training programs and industry conferences, representing Insurellm and fostering partnerships.  \n- **Diversity & Inclusion Initiatives**: Avery has championed a commitment to diversity in hiring practices, seeing visible improvements in team representation since 2021.  \n- **Work-Life Balance**: Feedback revealed concerns regarding work-life balance, which Avery has approached by implementing flexible working conditions and ensuring regular check-ins with the team.\n- **Community Engagement**: Avery led community outreach efforts, focusing on financial literacy programs, particularly aimed at underserved populations, improving Insurellm's corporate social responsibility image."),
 Document(id='c84d4007-8dfd-404d-bbb7-2271a1d0a0aa'

In [42]:
llm.invoke("Who is Avery?")

AIMessage(content='That\'s a great question! "Avery" is a name that can refer to many different people, both real and fictional. To give you the best answer, I need a little more context.\n\nHere are some of the most famous people and characters named Avery:\n\n### In Pop Culture & Fiction\n\n*   **Avery Jennings** from the Disney Channel show **"Dog with a Blog"**: This is likely the most famous Avery for younger audiences. She is one of the main characters, a smart and competitive teenager who is initially unaware that her family\'s dog can talk.\n*   **Avery Quinn** from the TV show **"Grey\'s Anatomy"**: A prominent character in the later seasons, she is a talented and ambitious orthopedic surgeon.\n*   **Avery** from the movie **"The Secret Life of Bees"**: A kind and gentle beekeeper who takes in the main character, Lily.\n*   **Avery** from the video game **"The Last of Us Part II"**: A minor character who is the daughter of a woman named Nora.\n\n### In History & Science\n\n*  

### Putting it all together

In [43]:
SYSTEM_PROMPT_TEMPLATE = """
You are a knowledgeable, friendly assistant representing the company Insurellm.
You are chatting with a user about Insurellm.
If relevant, use the given context to answer any question.
If you don't know the answer, say so.
Context:
{context}
"""

In [44]:
def answer_question(question: str, history):
    docs = retreiver.invoke(question)
    context = "\n\n".join(doc.page_content for doc in docs)
    system_prompt = SYSTEM_PROMPT_TEMPLATE.format(context=context)
    response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=question)])
    return response.content
    

In [None]:
# answer_question("Who is Averi Lancaster?", [])

'Avery Lancaster is the Co-Founder and Chief Executive Officer (CEO) of Insurellm. Based in San Francisco, she has been leading the company since she co-founded it in 2015.\n\nAvery is recognized for her innovative leadership and risk management expertise, which have been instrumental in positioning Insurellm as a leading Insurance Tech provider. She is known for balancing customer needs with business objectives and effectively bridging the gap between technical and business stakeholders.'

In [None]:
# gr.ChatInterface(answer_question).launch(inbrowser=True)


Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


