<a href="https://colab.research.google.com/github/SidduVishnuPriya/CAPTCHA_Generator-using-HTML-CSS-JS/blob/main/virtual_insurance_agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install langchain langchain-experimental langchain-community langchain-openai openai chromadb pypdf sentence_transformers gradio langchain-together

Collecting langchain-experimental
  Downloading langchain_experimental-0.4.1-py3-none-any.whl.metadata (1.3 kB)
Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-openai
  Downloading langchain_openai-1.1.5-py3-none-any.whl.metadata (2.6 kB)
Collecting chromadb
  Downloading chromadb-1.3.7-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting pypdf
  Downloading pypdf-6.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-together
  Downloading langchain_together-0.3.1-py3-none-any.whl.metadata (4.0 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community)
  Downloading langchain_classic-1.0.0-py3-none-any.whl.metadata (3.9 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-

In [2]:
import os
from google.colab import userdata
os.environ["TOGETHER_API_KEY"] = userdata.get('TOGETHER_API_KEY') # user your together ai api key here
# go to https://docs.together.ai/docs/quickstart to register

#document loader
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader

# Doc splitting
from langchain.text_splitter import RecursiveCharacterTextSplitter

# vector store
from langchain_community.vectorstores import Chroma

#llm
from langchain_together import Together

SecretNotFoundError: Secret TOGETHER_API_KEY does not exist.

In [None]:
loader = PyPDFDirectoryLoader("Insurance Exam Documents/")
docs = loader.load()

In [None]:
len(docs)

In [None]:
docs[16]

In [None]:
def split_docs(documents, chunk_size=500, chunk_overlap=100):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs

In [None]:
pages = split_docs(docs)
len(pages)

In [None]:
pages[500].page_content

In [None]:
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)

embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

db = Chroma.from_documents(pages, embedding_function)

In [None]:
query = "what is risk management?"
response = db.similarity_search(query)
print(response[0])

In [None]:
response = db.similarity_search_with_relevance_scores(query)
response

# Build RAG using above

In [None]:
llm = Together(
    model="meta-llama/Llama-2-70b-chat-hf",
    max_tokens=256,
    temperature=0.1,
    top_k=1
)

## similarity score helps to fetch only relevant results

In [None]:
retriever = db.as_retriever(similarity_score_threshold = 0.6)

In [None]:
from langchain.prompts import PromptTemplate
prompt_template = """
    You are a helpful insurance agent. Answer insurance related question using the given context.
    Context: {context}
    User Question: {question}
    Answer in less than 100 words
    If you don't have any context and are unsure of the answer, reply that you don't know about this topic.
    """

PROMPT = PromptTemplate(template = f"[INST] {prompt_template} [/INST]", input_variables=["context", "question"])

In [None]:
from langchain.chains import RetrievalQA
chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type='stuff',
    retriever= retriever,
    input_key = 'query',
    return_source_documents = True,
    chain_type_kwargs={"prompt":PROMPT},
    verbose=True

)

In [None]:
from IPython.display import display
from IPython.display import Markdown
import textwrap

def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [None]:
query = "what is indemnity?"
response = chain(query)
to_markdown(response['result'])

In [None]:
query = "can minors have a life insurance policy?"
response = chain(query)
to_markdown(response['result'])

In [None]:
query = "what kind of insurance policy is recommended for senior citizens?"
response = chain(query)
print(response)
to_markdown(response['result'])


In [None]:
query = "under what circumstances can a person withdraw money from their MPF account?"
response = chain(query)
to_markdown(response['result'])

In [None]:
query = "can someone withdraw money from MPF if they are permanently leaving Hong Kong?"
response = chain(query)
to_markdown(response['result'])