## **Install necessary libraries**

In [2]:
# !pip install -qqq langchain llama-index llama-hub openai accelerate==0.21.0 bitsandbytes==0.40.2 transformers sentence_transformers InstructorEmbedding chromadb pypdf pdf2image

In [1]:
# !pip install beautifulsoup4>=4.12.2

In [2]:
# !pip install --upgrade llama_index MainContentExtractor

# **Importing necessary libraries**

In [3]:
import torch
import logging
import sys

# llama_index
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage, ServiceContext
from langchain.vectorstores import Chroma

from transformers import AutoTokenizer, TextStreamer, pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer


# llama_index
from llama_index.response.notebook_utils import display_response
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.indices.postprocessor import SimilarityPostprocessor, KeywordNodePostprocessor
from llama_index.response_synthesizers import get_response_synthesizer
from llama_index import SummaryIndex
from llama_index.text_splitter import SentenceSplitter
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.response.pprint_utils import pprint_response
from llama_index.prompts import PromptTemplate
from llama_index.llms import HuggingFaceLLM

# chromadb
import chromadb

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# **Data Connectors**

In [4]:
from llama_index import VectorStoreIndex, download_loader

MainContentExtractorReader = download_loader("MainContentExtractorReader")
#Loading external data sources
loader = MainContentExtractorReader()
documents_link = loader.load_data(urls=['https://licindia.in/web/guest/know-your-life-insurance'])
documents_pdf = SimpleDirectoryReader('/content/data').load_data()
documents = documents_pdf + documents_link

# **Models**

**LLM-Llama-2**

In [5]:
#hugging face token
hf_token="Hugging Face Token"

In [6]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-2-7b-chat-hf",
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    query_wrapper_prompt=PromptTemplate("<s> [INST] {query_str} [/INST] "),
    context_window=3900,
    model_kwargs={"token": hf_token, "quantization_config": quantization_config},
    tokenizer_kwargs={"token": hf_token},
    device_map="auto",
)

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

# **Embedding Model**

In [7]:
embed_model = HuggingFaceInstructEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": DEVICE}
)

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

load INSTRUCTOR_Transformer
max_seq_length  512


## **Defining the Service Context**

In [8]:
text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=20) # using the default chunk_# values as they work just fine

# set context window
context_window = 4096
# set number of output tokens
num_output = 256

service_context = ServiceContext.from_defaults(llm=llm,
                                               embed_model=embed_model,
                                               text_splitter=text_splitter,
                                              #  context_window=context_window,
                                              #  num_output=num_output,
                                               )

## **Index Setup**

In [9]:
vector_index = VectorStoreIndex.from_documents(documents, service_context=service_context)

In [10]:
summary_index = SummaryIndex.from_documents(documents, service_context=service_context)

## **Storing**

In [11]:
# # To make ephemeral client that is a short lasting client or an in-memory client
# db = chromadb.EphemeralClient()

# initialize client, setting path to save data
db = chromadb.PersistentClient(path="./chroma_db")

# create collection
chroma_collection = db.get_or_create_collection("health_care_database")

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# create your index
vector_index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    service_context=service_context
)

# **Querying**

# **refine-Mode**

In [12]:
query_engine = vector_index.as_query_engine(response_mode="refine")

response = query_engine.query("What are the benefits of Health Insurance plan?")

display_response(response)

**`Final Response:`** Based on the provided policy document, the benefits of the Health Insurance plan include:

1. Coverage for medical expenses: The policy covers reasonable expenses incurred towards medical treatment taken during the policy period for illnesses, accidents, or conditions contracted or sustained by an insured person during the policy period, subject to the sum insured, subsidiary limits, terms, conditions, limitations, and exclusions mentioned in the policy.
2. In-patient treatment: The policy covers medical expenses for doctors' fees, diagnostic tests, medicines, drugs, and consumables, intravenous fluids, blood transfusion, injection administration charges, operation theatre charges, cost of prosthetics and other devices or equipment if implanted internally during a surgical operation, and hospital accommodation.
3. Pre-hospitalization medical expenses: The policy covers medical expenses incurred due to illness up to 30 days immediately before an insured person's admission to a hospital for the same illness, as long as the policy has accepted an in-patient hospitalization claim under 2.1 above. Pre-hospitalization exp

In [13]:
query_engine = vector_index.as_query_engine(response_mode="refine")

response = query_engine.query("What Is Life Insurance?")

display_response(response)

**`Final Response:`** Based on the additional context provided, here's a refined answer to the query "What is life insurance?" that better addresses the query:

Life insurance is a contract between the policyholder and the insurance company, where the insurer promises to pay a death benefit to the policyholder's beneficiaries in the event of their death. The policyholder pays premiums to the insurer in exchange for this promise. The contract is based on the principle of utmost good faith, which means that the policyholder must disclose all material facts when taking out the policy.

The policy can be taken by any individual who has reached the age of majority and meets the eligibility criteria set by the insurance company. The policy can also be taken on the life of the policyholder's spouse or children, subject to certain conditions.

There are different types of life insurance policies, including:

* With profit plans: These policies offer bonuses after periodical valuations, which are payable along with the contracted amount. The premium rate for these policies is higher than for without profit plans.
* Without profit plans: These policies pay the contracted amount without any bonuses.
* Ke

In [17]:
query_engine = vector_index.as_query_engine(response_mode="refine")

response = query_engine.query("List out the contact details for Life Insurance")

display_response(response)

**`Final Response:`** Based on the refined context provided, the answer to the original query can be refined as follows:

The original query asked for the contact details of Life Insurance, and the refined context provides more information on who can buy a life insurance policy in India, the different types of policies available, and the benefits of having a life insurance policy.

Therefore, the refined answer to the original query would be:

To buy a life insurance policy in India, you can contact any of the following:

1. Life Insurance Corporation of India (LIC): LIC is the largest life insurance company in India and offers a range of policies, including endowment, whole life, term life, and group insurance. You can visit their website at [www.licindia.com](http://www.licindia.com) or visit any of their branches across India.
2. Private life insurance companies: There are several private life insurance companies operating in India, such as HDFC Life, Max Life, and Birla Sun Life, among others. You can visit their websites or visit their branches to learn more about their policies and buy a policy that suits your

# **Execute the following code for Continuos Chat as chatbot assistant**

In [None]:
while True:
  query=input()
  response = query_engine.query(query)
  print(response)

Summarize the Claims Procedure
Based on the additional context provided, the Claims Procedure for the Health Insurance Plan is as follows:

1. For any treatment, consultation, or procedure, the Insured Person should be notified in writing at least 72 hours prior to the planned date.
2. In an emergency, the Insured Person should notify the Insurer in writing within 15 days of discharge from the Hospital.
3. The following information should be mandated in the notification:
	* Policy number
	* Name of Policy holder
	* Name of Insured Person
	* Name of Hospital with address and contact number
	* Diagnosis
	* Treatment undergone (medical/surgical management and approximate amount being claimed)
4. For any Illness or Accident or medical condition that requires Hospitalization, the Insured Person should deliver the following documents to the Insurer within 30 days of the Insured Person's discharge from Hospital (or 30 days of the completion of post-hospitalization in non-Accident cases):
	* D