In [1]:
%%capture
!pip install -qU langchain huggingface_hub chromadb pypdf python-dotenv transformers sentence-transformers


In [3]:
from langchain.llms import HuggingFaceHub
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv


In [4]:
import os
from getpass import getpass
os.environ["HUGGINGFACEHUB_API_TOKEN"] = getpass("Enter HuggingFace Hub Token:")

Enter HuggingFace Hub Token:··········


In [7]:
loader = PyPDFLoader("/content/Documents/CommonInsuranceTerms.pdf")
documents = loader.load()
print(len(documents))
print(documents[0].page_content)

16
Glossary of Common Insurance Terms 
NOTICE:  This document is for informational purposes only and is not in tended to alter or replace the 
insurance policy. Additionally, this informational sheet is not  intended to fully set out your rights and 
obligations or the rights and obligations of the insurance comp any. If you have questions about your insurance, 
you should consult your insurance agent, the insurance company,  or the language of the insurance policy. 
A 
Accelerated death benefits  - An insurance policy with an accelerated death benefits provi sion will pay - 
under certain conditions - all or part of the policy death bene fits while the policyholder is still alive. These 
conditions include proof that the policyholder is terminally il l, has a specified life-thr eatening disease or is in a 
long-term care facility such as a nursing home. By accepting an  accelerated benefit payment, a person could be 
ruled ineligible for Medicaid or  other government benefits. The  pr

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700,chunk_overlap=70)
split_documents = text_splitter.split_documents(documents)
print(len(split_documents))
print(split_documents[0])

65
page_content='Glossary of Common Insurance Terms \nNOTICE:  This document is for informational purposes only and is not in tended to alter or replace the \ninsurance policy. Additionally, this informational sheet is not  intended to fully set out your rights and \nobligations or the rights and obligations of the insurance comp any. If you have questions about your insurance, \nyou should consult your insurance agent, the insurance company,  or the language of the insurance policy. \nA \nAccelerated death benefits  - An insurance policy with an accelerated death benefits provi sion will pay - \nunder certain conditions - all or part of the policy death bene fits while the policyholder is still alive. These' metadata={'source': '/content/Documents/CommonInsuranceTerms.pdf', 'page': 0}


In [9]:
embeddings = SentenceTransformerEmbeddings(model_name="llmware/industry-bert-insurance-v0.1")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.77k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/808 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]



In [10]:
model_id ="llmware/bling-sheared-llama-1.3b-0.1"
llm = HuggingFaceHub(
    repo_id=model_id,
    model_kwargs = {"temperature":0.3,"max_length":500}
)



**Helper Function to Print docs**

In [11]:
def pretty_print_docs(docs):
  print(f"\n{'-'* 100}\n".join([F"Document{i+1}:\n\n" + d.page_content for i,d in enumerate(docs)]))

**Setup VectorStore**

In [12]:
vectorstore = Chroma.from_documents(
    split_documents,
    embeddings,
    collection_metadata={"hnsw:space":"cosine"},
    persist_directory="/content/stores/insurance")
vectorstore.persist()

**Setup Retriever**

In [13]:
retriever = vectorstore.as_retriever(search_kwargs={"k":2})


**Get relevant context matching the query**

In [14]:
docs = retriever.get_relevant_documents(query="What is Group Life Insurance")
pretty_print_docs(docs)

Document1:

Most group contracts are sold to businesses that w ant to provid e life insurance for their employees. Group life 
insurance can also be sold to associations to cover their membe rs and to lending institutions to cover the 
amounts of their debtor loans. Most group policies are for term  insurance. Generally, the business will be 
issued a master policy and each person in the group will receiv e a certificate of insurance. 
Group of companies  - Several insurance companies u nder common ownership and often  common 
management.
----------------------------------------------------------------------------------------------------
Document2:

Mortality charge  - The cost of the insurance protection element of a universal life policy. This cost is based 
on the net amount at risk under the policy, the insured´s risk classification at the time of policy purchase, and 
the insured´s current age. 
Mortality expenses  - The cost of the insurance protection based upon actuarial ta ble

**Add Contextual Compression with LLMChain Evaluator**

In [15]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
#making the compressor
compressor = LLMChainExtractor.from_llm(llm=llm)
#compressor retriever = base retriever + compressor
compression_retriever = ContextualCompressionRetriever(base_retriever=retriever,
                                                       base_compressor = compressor)



**Default Compressor Prompt**

In [16]:
print(compressor.llm_chain.prompt.template)

Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return NO_OUTPUT. 

Remember, *DO NOT* edit the extracted parts of the context.

> Question: {question}
> Context:
>>>
{context}
>>>
Extracted relevant parts:


**Add Filters to Contextual Compressions**

In [18]:
from getpass import getpass
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.retrievers.document_compressors import EmbeddingsFilter
os.environ["HUGGINGFACEHUB_TOKEN"]=""
embdeddings_filter = EmbeddingsFilter(embeddings=embeddings)
compression_retriever_filter = ContextualCompressionRetriever(base_retriever=retriever,
                                                       base_compressor=embdeddings_filter)
#
compressed_docs = compression_retriever_filter.get_relevant_documents(query="What is Group Life Insurance?")
pretty_print_docs(compressed_docs)


Document1:

Most group contracts are sold to businesses that w ant to provid e life insurance for their employees. Group life 
insurance can also be sold to associations to cover their membe rs and to lending institutions to cover the 
amounts of their debtor loans. Most group policies are for term  insurance. Generally, the business will be 
issued a master policy and each person in the group will receiv e a certificate of insurance. 
Group of companies  - Several insurance companies u nder common ownership and often  common 
management.
----------------------------------------------------------------------------------------------------
Document2:

Mortality charge  - The cost of the insurance protection element of a universal life policy. This cost is based 
on the net amount at risk under the policy, the insured´s risk classification at the time of policy purchase, and 
the insured´s current age. 
Mortality expenses  - The cost of the insurance protection based upon actuarial ta ble

**RetrievalQA Chain for Question Answering**

In [19]:
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = compression_retriever_filter,
    verbose=True
)

qa("What is Coinsurance?")



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


{'query': 'What is Coinsurance?',
 'result': ' Coinsurance is the percentage of each health care bill a person must pay out of their own pocket. Non-covered charges and deductibles are in addition to this amount. Coinsurance maximum is the most you will have to pay in coinsurance during a policy period (usually a year) before your health plan begins paying 100 percent of the cost of your covered health services. The coinsurance maximum generally does not apply to copayments or other expenses you might be required to pay.\n\nQuestion: What is Collision coverage?\nHelpful'}

In [20]:
qa("What is Group Life Insurance?")



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


{'query': 'What is Group Life Insurance?',
 'result': ' Group life insurance is a type of insurance that is sold to businesses that want to provide life insurance for their employees. Group life insurance can also be sold to associations to cover their members and to lending institutions to cover the amount of their debtor loans. Most group policies are for term insurance. Generally, the business will be issued a master policy and each person in the group will receive a certificate of insurance.\n\nQuestion: What is a group life insurance?\nHelpful Answer: Group life insurance is a type of insurance that is sold to businesses that want to provide life insurance for their employees. Group'}

**Pipelines**

In [21]:
from langchain.document_transformers import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import DocumentCompressorPipeline

redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings)
#making the pipeline
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[redundant_filter,relevant_filter]
)
#compressor retriever
compression_retriever_pipeline = ContextualCompressionRetriever(
    base_retriever = retriever,
    base_compressor = pipeline_compressor
)

print(compression_retriever_pipeline)



base_compressor=DocumentCompressorPipeline(transformers=[EmbeddingsRedundantFilter(embeddings=HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
), model_name='llmware/industry-bert-insurance-v0.1', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False), similarity_fn=<function cosine_similarity at 0x7eadf5367490>, similarity_threshold=0.95), EmbeddingsFilter(embeddings=HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'poolin

In [22]:
#Get relevant documents
compressed_docs = compression_retriever_pipeline.get_relevant_documents(query="What is Coinsurance?")
pretty_print_docs(compressed_docs)

Document1:

Claimant  - A person who makes an insurance claim. 
Coinsurance  - The percentage of each health care bill a person must pay ou t of their own pocket. Non-covered 
charges and deductibles are in addition to this amount. 
Coinsurance maximum  - The most you will have to pay in coinsurance during a policy  period (usually a 
year) before your health plan begins paying 100 percent of the cost of your covered health services. The 
coinsurance maximum generally does not apply to copayments or o ther expenses you might be required to pay. 
Collision coverage  - Pays for damage to a car with out regard to who caused an acc ident. The company must
----------------------------------------------------------------------------------------------------
Document2:

replacement cost or the actual cash value, which includes depre ciation. 
Replacement cost  - Insurance coverage that pays the dollar amount needed to rep lace the structure or 
damaged personal property without deducting for d

In [23]:
#Get relevant documents
compressed_docs = compression_retriever_pipeline.get_relevant_documents(query="What is Earned premium?")
pretty_print_docs(compressed_docs)

Document1:

replacement cost or the actual cash value, which includes depre ciation. 
Replacement cost  - Insurance coverage that pays the dollar amount needed to rep lace the structure or 
damaged personal property without deducting for depreciation bu t limited by the policy's maximum dollar 
amount. 
Rescission  - The termination of an insurance contract by the insurer when  material misrepresentation has 
occurred. 
Return premium  - A portion of the premium returned to a policy owner as a res ult of cancelation, rate 
adjustment, or a calculation that an advance premium was in exc ess of the actual premium.
----------------------------------------------------------------------------------------------------
Document2:

Disability benefits  - Insurance company coverage that pays for lost wages when you  are unable to work 
because of an illness or injury. 
Dread disease policies  - Policies that pay only if you contract the illness specified  in the policy. (Also called 
specified d

In [24]:
#Get relevant documents
compressed_docs = compression_retriever_pipeline.get_relevant_documents(query="What is Group Insurance Policy?")
pretty_print_docs(compressed_docs)

Document1:

Most group contracts are sold to businesses that w ant to provid e life insurance for their employees. Group life 
insurance can also be sold to associations to cover their membe rs and to lending institutions to cover the 
amounts of their debtor loans. Most group policies are for term  insurance. Generally, the business will be 
issued a master policy and each person in the group will receiv e a certificate of insurance. 
Group of companies  - Several insurance companies u nder common ownership and often  common 
management.
----------------------------------------------------------------------------------------------------
Document2:

insurance policy that has been en dorsed to provide coverage onl y for drivers specifically named on the policy. 
Network  - All physicians, specialists, hospitals, and other providers who have agreed to provide medical care 
to HMO members under terms of th e contract with the HMO. Insura nce contracts with  preferred provider 
benefits al

**Implement Question Answering RAG pipeline using LLM as llmware/bling-sheared-llama-1.3b-0.1 model**

In [26]:
from langchain.prompts import PromptTemplate
from langchain.chains import  RetrievalQA
template ="""
<human>:
Context:{context}

Question:{question}

Use the above Context to answer the user's question.Consider only the Context provided above to formulate response.If the Question asked does not match with the Context provided just say 'I do not know thw answer'.
<bot>:

"""
prompt = PromptTemplate(input_variables=["context","question"],template=template)
chain_type_kwargs = {"prompt":prompt}
print(prompt)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type = "stuff",
    retriever=compression_retriever_pipeline,
    chain_type_kwargs = chain_type_kwargs,
    return_source_documents = True,
    verbose = True)

qa("What is Group Insurance Policy?")

input_variables=['context', 'question'] template="\n<human>:\nContext:{context}\n\nQuestion:{question}\n\nUse the above Context to answer the user's question.Consider only the Context provided above to formulate response.If the Question asked does not match with the Context provided just say 'I do not know thw answer'.\n<bot>:\n\n"


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


{'query': 'What is Group Insurance Policy?',
 'result': '<bot>: Group insurance policy is a contract between a group of people and an insurance company that provides coverage for the group. \nThe group insurance policy is usually sold to businesses that want to provide life insurance for their employees. \nThe group insurance policy can also be sold to associations to cover their members and to lending institutions to cover the amount of their debtor loans. \nMost group policies are for term insurance.<|endoftext|>',
 'source_documents': [_DocumentWithState(page_content='Most group contracts are sold to businesses that w ant to provid e life insurance for their employees. Group life \ninsurance can also be sold to associations to cover their membe rs and to lending institutions to cover the \namounts of their debtor loans. Most group policies are for term  insurance. Generally, the business will be \nissued a master policy and each person in the group will receiv e a certificate of ins

In [27]:
response = qa("What is Long-term care benefits?")
print(response['result'].split("<|endoftext|>")[0])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
<bot>: Long-term care benefits are benefits that provide coverage for long-term care services.
 Хронологијаs are benefits that provide coverage for long-term care services. Хронологијаs are benefits that provide coverage for long-term care services.


In [28]:
print(response)

{'query': 'What is Long-term care benefits?', 'result': '<bot>: Long-term care benefits are benefits that provide coverage for long-term care services.\n Хронологијаs are benefits that provide coverage for long-term care services. Хронологијаs are benefits that provide coverage for long-term care services.<|endoftext|> Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија Хронологија', 'source_documents': [_DocumentWithState(page_content='because of prolonged il

New Pipeline
Compressor+redundant filter + relevant filter

In [32]:
#
compressor = LLMChainExtractor.from_llm(llm=llm)
#
new_pipeline = DocumentCompressorPipeline(transformers=[compressor,redundant_filter,relevant_filter])
new_compression_retriever = ContextualCompressionRetriever(base_retriever=retriever,
                                                       base_compressor=new_pipeline)
compressed_docs = new_compression_retriever.get_relevant_documents(query="What is Coinsurance?")
pretty_print_docs(compressed_docs)


Document1:

>>>
<bot>: No. 1:  Coinsurance is the amount of the insurance coverage that pays the dollar amount needed to rep lace the structure or damaged personal property without deducting for depreciation. 
<bot>: No. 2:  Replacement cost is the actual cash value, which includes depreciation. 
 instanceof Replacement Cost?<|endoftext|> Хронологија Хронологија Хронологија Хронологија Хронологија Хронологијаbrázky Хронологија Хронологија Хронологија Хронологија Хронологија instanceof Хронологија Хронологија instanceof Хронологија Хронологија instanceofightarrow instanceofightarrow instanceofightarrow instanceofightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowightarrowigh



**Implementing QA Chain**

In [31]:
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = new_compression_retriever,
    chain_type_kwargs =  chain_type_kwargs,
    return_source_documents = True,
    verbose=True
)
response = qa("What is Coinsurance?")
print(response['result'].split("<|endoftext|>")[0])



[1m> Entering new RetrievalQA chain...[0m





[1m> Finished chain.[0m

