# AI_Helpmate_RAG_project

**The goal of the project will be to build a robust generative search system capable of effectively and accurately answering questions from a policy document.**

In [1]:
# Importing all the required Libraries

import pdfplumber
from pathlib import Path
import pandas as pd
from operator import itemgetter
import json
import tiktoken
import openai
import chromadb

In [2]:
# Defining the path where the pdf document is present

pdf_path = Path("E:\Documents\AIML_masters\course6_Elective\week9\Principal-Sample-Life-Insurance-Policy.pdf")

## Processing and Chunking the PDF file

In [3]:
# Function to check whether a word is present in a table or not for segregation of regular text and tables

def check_bboxes(word, table_bbox):
    
    # Check whether word is inside a table bbox.
    l = word['x0'], word['top'], word['x1'], word['bottom']
    r = table_bbox
    return l[0] > r[0] and l[1] > r[1] and l[2] < r[2] and l[3] < r[3]

In [4]:
# Function to extract text from a PDF file.

# 1. Declare a variable p to store the iteration of the loop that will help us store page numbers alongside the text
# 2. Declare an empty list 'full_text' to store all the text files
# 3. Use pdfplumber to open the pdf pages one by one
# 4. Find the tables and their locations in the page
# 5. Extract the text from the tables in the variable 'tables'
# 6. Extract the regular words by calling the function check_bboxes() and checking whether words are present in the table or not
# 7. Use the cluster_objects utility to cluster non-table and table words together so that they retain the same chronology as in the original PDF
# 8. Declare an empty list 'lines' to store the page text
# 9. If a text element in present in the cluster, append it to 'lines', else if a table element is present, append the table
# 10. Append the page number and all lines to full_text, and increment 'p'
# 11. When the function has iterated over all pages, return the 'full_text' list

def extract_text_from_pdf(pdf_path):
    p = 0
    full_text = []


    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_no = f"Page {p+1}"
            text = page.extract_text()

            tables = page.find_tables()
            table_bboxes = [i.bbox for i in tables]
            tables = [{'table': i.extract(), 'top': i.bbox[1]} for i in tables]
            non_table_words = [word for word in page.extract_words() if not any(
                [check_bboxes(word, table_bbox) for table_bbox in table_bboxes])]
            lines = []

            for cluster in pdfplumber.utils.cluster_objects(non_table_words + tables, itemgetter('top'), tolerance=5):

                if 'text' in cluster[0]:
                    try:
                        lines.append(' '.join([i['text'] for i in cluster]))
                    except KeyError:
                        pass

                elif 'table' in cluster[0]:
                    lines.append(json.dumps(cluster[0]['table']))


            full_text.append([page_no, " ".join(lines)])
            p +=1

    return full_text

**Since this is an insurance document the chunking is found to be appropriate if done page wise.**

In [5]:
# Extracting the PDF and storing it in a dataframe

# Initialize an empty list to store the extracted texts and document names
data = []

# Process the single PDF file
print(f"...Processing {pdf_path.name}")

# Call the function to extract the text from the PDF
extracted_text = extract_text_from_pdf(pdf_path)

# Convert the extracted list to a DataFrame, and add a column to store the document name
extracted_text_df = pd.DataFrame(extracted_text, columns=['Page No.', 'Page_Text'])

# Append the extracted text and document name to the list
data.append(extracted_text_df)

# Print a message to indicate that processing is complete
print(f"Finished processing {pdf_path.name}")

...Processing Principal-Sample-Life-Insurance-Policy.pdf
Finished processing Principal-Sample-Life-Insurance-Policy.pdf


In [6]:
# Concatenating all DataFrames in the list into a single DataFrame

insurance_data = pd.concat(data, ignore_index=True)

insurance_data

Unnamed: 0,Page No.,Page_Text
0,Page 1,DOROTHEA GLAUSE S655 RHODE ISLAND JOHN DOE 01/...
1,Page 2,This page left blank intentionally
2,Page 3,POLICY RIDER GROUP INSURANCE POLICY NO: S655 C...
3,Page 4,This page left blank intentionally
4,Page 5,PRINCIPAL LIFE INSURANCE COMPANY (called The P...
...,...,...
59,Page 60,I f a Dependent who was insured dies during th...
60,Page 61,Section D - Claim Procedures Article 1 - Notic...
61,Page 62,A claimant may request an appeal of a claim de...
62,Page 63,This page left blank intentionally


In [7]:
# checking the length of all the texts

insurance_data['Text_Length'] = insurance_data['Page_Text'].apply(lambda x:len(x.split(' ')))

In [8]:
insurance_data['Text_Length']

0      30
1       5
2     230
3       5
4     110
     ... 
59    285
60    418
61    322
62      5
63      8
Name: Text_Length, Length: 64, dtype: int64

In [9]:
# Setting the threshold for the minimum number of words
threshold = 30

# Filtering rows to retain only those with text length of above the threshold
insurance_data = insurance_data.loc[insurance_data['Text_Length'] > threshold]

In [10]:
print(insurance_data)

   Page No.                                          Page_Text  Text_Length
2    Page 3  POLICY RIDER GROUP INSURANCE POLICY NO: S655 C...          230
4    Page 5  PRINCIPAL LIFE INSURANCE COMPANY (called The P...          110
5    Page 6  TABLE OF CONTENTS PART I - DEFINITIONS PART II...          153
6    Page 7  Section A – Eligibility Member Life Insurance ...          176
7    Page 8  Section A - Member Life Insurance Schedule of ...          171
8    Page 9  P ART I - DEFINITIONS When used in this Group ...          387
9   Page 10  T he legally recognized union of two eligible ...          251
10  Page 11  (2) has been placed with the Member or spouse ...          299
11  Page 12  An institution that is licensed as a Hospital ...          352
12  Page 13  a . A licensed Doctor of Medicine (M.D.) or Os...          260
13  Page 14  c . end stage renal failure; or d. acquired im...          316
14  Page 15  A record which is on or transmitted by paper o...           36
15  Page 16 

In [11]:
# Adding the metadata to the dataframe

document_name = "Principal-Sample-Life-Insurance-Policy"

insurance_data.loc[:, 'Metadata'] = insurance_data.apply(
    lambda x: {'Policy_Name': document_name, 'Page_No.': x['Page No.']}, axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  insurance_data.loc[:, 'Metadata'] = insurance_data.apply(


In [12]:
print(insurance_data.head())

  Page No.                                          Page_Text  Text_Length  \
2   Page 3  POLICY RIDER GROUP INSURANCE POLICY NO: S655 C...          230   
4   Page 5  PRINCIPAL LIFE INSURANCE COMPANY (called The P...          110   
5   Page 6  TABLE OF CONTENTS PART I - DEFINITIONS PART II...          153   
6   Page 7  Section A – Eligibility Member Life Insurance ...          176   
7   Page 8  Section A - Member Life Insurance Schedule of ...          171   

                                            Metadata  
2  {'Policy_Name': 'Principal-Sample-Life-Insuran...  
4  {'Policy_Name': 'Principal-Sample-Life-Insuran...  
5  {'Policy_Name': 'Principal-Sample-Life-Insuran...  
6  {'Policy_Name': 'Principal-Sample-Life-Insuran...  
7  {'Policy_Name': 'Principal-Sample-Life-Insuran...  


## Generating the Embeddings using OpenAI and ChromaDB

**In this section, we will embed the pages in the dataframe through OpenAI's text-embedding-ada-002 model, and store them in a ChromaDB collection.** 

In [13]:
# Defining the path to the API key file
filepath = "E:\\Documents\\AIML_masters\\course6_Elective\\OpenAI_API_Key.txt"

# Reading the API key from the file
with open(filepath , "r") as f:
    openai.api_key = f.read().strip()

In [14]:
# Defining the path where chroma collections will be stored

chroma_data_path = Path("E:/Documents/AIML_masters/course6_Elective/week9")

In [15]:
# Creating the directory if it doesn't exist
chroma_data_path.mkdir(parents=True, exist_ok=True)

# Initializing the PersistentClient with the directory path
client = chromadb.PersistentClient(path=str(chroma_data_path))

In [16]:
# Importing the OpenAI Embedding Function into chroma

from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

# Setting up the embedding function using the OpenAI embedding model

model = "text-embedding-ada-002"
embedding_function = OpenAIEmbeddingFunction(api_key=openai.api_key, model_name=model)

In [17]:
# Initializing a collection in chroma and passing the embedding_function

insurance_collection = client.get_or_create_collection(name='RAG_on_Insurance', embedding_function=embedding_function)

In [18]:
# Converting the page text and metadata from the dataframe to lists

documents_list = insurance_data["Page_Text"].tolist()
metadata_list = insurance_data['Metadata'].tolist()

In [19]:
# Adding the documents and metadata to the collection alongwith generic integer IDs.

insurance_collection.add(
    documents= documents_list,
    ids = [str(i) for i in range(0, len(documents_list))],
    metadatas = metadata_list
)

In [20]:
# looking at the collection

insurance_collection.get(
    ids = ['0','1','2'],
    include = ['embeddings', 'documents', 'metadatas']
)

{'ids': ['0', '1', '2'],
 'embeddings': [[-0.013148510828614235,
   0.008859461173415184,
   -0.004594430327415466,
   -0.02860281430184841,
   -0.015509204007685184,
   0.023346155881881714,
   0.009243760257959366,
   -0.009381009265780449,
   -0.008468300104141235,
   -0.011336816474795341,
   -0.01922866888344288,
   0.006759542506188154,
   0.0020227159839123487,
   0.02533627487719059,
   -0.013779859058558941,
   0.028712615370750427,
   0.011426028795540333,
   -0.03186935558915138,
   0.024114754050970078,
   0.019420817494392395,
   0.026283297687768936,
   0.015015105716884136,
   -0.01478178147226572,
   0.009072198532521725,
   -0.0016710137715563178,
   -0.0016221186378970742,
   0.021726610139012337,
   -0.015042555518448353,
   0.037606388330459595,
   0.0014651394449174404,
   0.024430427700281143,
   0.002729551400989294,
   -0.03126545622944832,
   -0.03060665912926197,
   -0.0031361531000584364,
   0.019338468089699745,
   0.012819112278521061,
   0.0124279502779245

In [21]:
# Creating the cache collection 

cache_collection = client.get_or_create_collection(name='Insurance_Cache', embedding_function=embedding_function)

In [22]:
cache_collection.peek()

{'ids': [],
 'embeddings': [],
 'metadatas': [],
 'documents': [],
 'uris': None,
 'data': None,
 'included': ['embeddings', 'metadatas', 'documents']}

The Cache collection is empty as expected

## Semantic search with cache

In [131]:
# Reading the user query

query = input()

provide the detailed view of the premium payments, the rates and amount. Also clarify about the grace period and what happens on default of payment?


In [132]:
# Searching the Cache collection first
# Querying the collection against the user query and returning the top 3 results 

cache_results = cache_collection.query(
    query_texts=query,
    n_results=3
)

In [133]:
# Implementing the cache function

# threshold for cache search
threshold = 0.2

ids = []
documents = []
distances = []
metadatas = []
results_df = pd.DataFrame()

# If the distance is greater than the threshold, then return the results from the main collection.
if cache_results['distances'][0] == [] or cache_results['distances'][0][0] > threshold:
    # Querying the collection against the user query and return the top 10 results
    results = insurance_collection.query(
        query_texts=query,
        n_results=10
    )

    # Store the query in cache_collection as document w.r.t to ChromaDB so that it can be embedded and searched again later
    # Store retrieved text, ids, distances and metadatas in cache_collection as metadatas, so that they can be fetched easily if a query indeed matches to a query in cache
    Keys = []
    Values = []

    for key, val in results.items():
        if val is None:
            continue
        for i in range(min(10, len(val[0]))): 
            Keys.append(str(key) + str(i))
            Values.append(str(val[0][i]))

    cache_collection.add(
        documents=[query],
        ids=[query],
        metadatas=dict(zip(Keys, Values))
    )

    print("Not found in cache. Found in main collection.")

    result_dict = {
        'Metadatas': results['metadatas'][0],
        'Documents': results['documents'][0],
        'Distances': results['distances'][0],
        'IDs': results['ids'][0]
    }
    results_df = pd.DataFrame.from_dict(result_dict)

# If the distance is, however, less than the threshold, the cache result is returned
elif cache_results['distances'][0][0] <= threshold:
    cache_result_dict = cache_results['metadatas'][0][0]

    # Loop through each inner list and then through the dictionary
    for key, value in cache_result_dict.items():
        if 'ids' in key:
            ids.append(value)
        elif 'documents' in key:
            documents.append(value)
        elif 'distances' in key:
            distances.append(value)
        elif 'metadatas' in key:
            metadatas.append(value)

    print("Found in cache!")

    # Creating a DataFrame
    results_df = pd.DataFrame({
        'IDs': ids,
        'Documents': documents,
        'Distances': distances,
        'Metadatas': metadatas
    })

results_df


Not found in cache. Found in main collection.


Unnamed: 0,Metadatas,Documents,Distances,IDs
0,"{'Page_No.': 'Page 20', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","Section B - Premiums Article 1 - Payment Responsibility; Due Dates; Grace Period The Policyholder is responsible for collection and payment of all premiums due while this Group Policy is in force. Payments must be sent to the home office of The Principal in Des Moines, Iowa. The first premium is due on the Date of Issue of this Group Policy. Each premium thereafter will be due on the first of each Insurance Month. Except for the first premium, a Grace Period of 31 days will be allowed for payment of premium. ""Grace Period"" means the first 31-day period following a premium due date. The Group Policy will remain in force until the end of the Grace Period, unless the Group Policy has been terminated by notice as described in PART II, Section C. The Policyholder will be liable for payment of the premium for the time this Group Policy remains in force during the Grace Period. Article 2 - Premium Rates The premium rate(s) for each Member insured for Life Insurance will be: a. Member Life Insurance $0.210 for each $1,000 of insurance in force. b. Member Accidental Death and Dismemberment Insurance $0.025 for each $1,000 of Member Life Insurance in force. c. Dependent Life Insurance $1.46 for each Member insured for Dependent Life Insurance. If the Policyholder has at least two other eligible group insurance policies underwritten by The Principal, as determined by The Principal, the Policyholder may be eligible for a multiple policy discount. Article 3 - Premium Rate Changes The Principal may change a premium rate: a. on any premium due date, if the initial premium rate has then been in force 24 months or more and if Written notice is given to the Policyholder at least 31 days before the date of change; or This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6004 Section B - Premiums, Page 1",0.319258,16
1,"{'Page_No.': 'Page 23', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","Section C - Policy Termination Article 1 - Failure to Pay Premium This Group Policy will terminate at the end of the Grace Period if total premium due has not been received by The Principal before the end of the Grace Period. Failure by the Policyholder to pay the premium within the Grace Period will be deemed notice by the Policyholder to The Principal to discontinue this Group Policy at the end of the Grace Period. Article 2 - Termination Rights of the Policyholder The Policyholder may terminate this Group Policy effective on the day before any premium due date by giving Written notice to The Principal prior to that premium due date. The Policyholder's issuance of a stop-payment order for any amounts used to pay premiums for the Policyholder's coverage will be considered Written notice from the Policyholder. Article 3 - Termination Rights of The Principal The Principal may nonrenew or terminate this Group Policy by giving the Policyholder 31 days advance notice in Writing, if the Policyholder: a. ceases to be actively engaged in business for profit within the meaning of the Internal Revenue Code, or be established as a legitimate nonprofit corporation within the meaning of the Internal Revenue Code; or b. fails to maintain the participation percentages requirements of PART II, Section A with respect to eligible employees, excluding those for whom Proof of Good Health is not satisfactory to The Principal; or c. fails to maintain three or more insured employees under this Group Policy; or d. fails to pay premium in accordance with the requirements of PART II, Section B; or e. has performed an act or practice that constitutes fraud or has made an intentional misrepresentation of material fact under the terms of this Group Policy; or f. does not promptly provide The Principal with information that is reasonably required; or g. fails to perform any of its obligations that relate to this Group Policy. This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6005 Section C - Policy Termination, Page 1",0.350216,19
2,"{'Page_No.': 'Page 21', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","b . on any date the definition of Member or Dependent is changed; and c. on any date the Policyholder's business, as specified on the Policyholder application, is changed; and d. on any date that a schedule of insurance or class of insured Members is changed; and e. on any premium due date, if the Policyholder has been receiving a multiple policy discount rate and the Policyholder drops below the minimum number of coverages to receive such discount rate; and f. on any date the premium contribution required of Members is changed; and g. with respect to Member Life Insurance, on any Policy Anniversary, if the average age, average Scheduled Benefit amount, or the male/female distribution for then insured Members has changed since the last Policy Anniversary; and h. on any Policy Anniversary, if the volume of insurance for then insured Members has increased or decreased by more than 25% since the last Policy Anniversary. If the Policyholder has other group insurance with The Principal, and if life coverage is initially added on a date other than the Policy Anniversary and it is more than six months before the next Policy Anniversary, The Principal reserves the right to change the premium rate on the next Policy Anniversary. Written notice will be given to the Policyholder at least 31 days before the date of change. If the Policyholder agrees to participate in the electronic services program of The Principal and, at a later date elects to withdraw from participation, such withdrawal may result in certain administrative fees being charged to the Policyholder. Article 4 - Premium Amount The amount of premium to be paid on each due date will be determined in these ways: a. Member Life Insurance The total volume of insurance in force will be divided by 1,000. The result will then be multiplied by the premium rate then in effect. b. Member Accidental Death and Dismemberment Insurance The total volume of insurance in force will be divided by 1,000. The result will then be multiplied by the premium rate then in effect. c. Dependent Life Insurance This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6004 Section B - Premiums, Page 2",0.381707,17
3,"{'Page_No.': 'Page 6', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","TABLE OF CONTENTS PART I - DEFINITIONS PART II - POLICY ADMINISTRATION Section A – Contract Entire Contract Article 1 Policy Changes Article 2 Policyholder Eligibility Requirements Article 3 Policy Incontestability Article 4 Individual Incontestability Article 5 Information to be Furnished Article 6 Certificates Article 7 Assignments Article 8 Dependent Rights Article 9 Policy Interpretation Article 10 Electronic Transactions Article 11 Section B – Premium Payment Responsibility; Due Dates; Grace Period Article 1 Premium Rates Article 2 Premium Rate Changes Article 3 Premium Amount Article 4 Contributions from Members Article 5 Section C - Policy Termination Failure to Pay Premium Article 1 Termination Rights of the Policyholder Article 2 Termination Rights of The Principal Article 3 Policyholder Responsibility to Members Article 4 Section D - Policy Renewal Renewal Article 1 PART III - INDIVIDUAL REQUIREMENTS AND RIGHTS This policy has been updated effective January 1, 2014 GC 6001 TABLE OF CONTENTS, PAGE 1",0.391255,2
4,"{'Page_No.': 'Page 24', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","T he Principal may terminate the Policyholder's coverage on any premium due date if the Policyholder relocates to a state where this Group Policy is not marketed, by giving the Policyholder 31 days advanced notice in Writing. Article 4 - Policyholder Responsibility to Members If this Group Policy terminates for any reason, the Policyholder must: a. notify each Member of the effective date of the termination; and b. refund or otherwise account to each Member all contributions received or withheld from Members for premiums not actually paid to The Principal. This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6005 Section C - Policy Termination, Page 2",0.400594,20
5,"{'Page_No.': 'Page 18', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","c . a copy of the form which contains the statement is given to the insured or the insured's beneficiary at the time insurance is contested. However, these provisions will not preclude the assertion at any time of defenses based upon the person's ineligibility for insurance under this Group Policy or upon the provisions of this Group Policy. In addition, if an individual's age is misstated, The Principal may at any time adjust premium and benefits to reflect the correct age. Article 6 - Information to be Furnished The Policyholder must, upon request, give The Principal all information needed to administer this Group Policy. If a clerical error is found in this information, The Principal may at any time adjust premium to reflect the facts. An error will not invalidate insurance that would otherwise be in force. Neither will an error continue insurance that would otherwise be terminated. The Principal may inspect, at any reasonable time, all Policyholder records, which relate to this Group Policy. Article 7 - Certificates The Principal will give the Policyholder Certificates for delivery to insured Members. The delivery of such Certificates will be in either paper or electronic format. The Certificates will be evidence of insurance and will describe the basic features of the coverage. They will not be considered a part of this Group Policy. Article 8 - Assignments No assignments of Member Life Insurance will be allowed under this Group Policy. Article 9 - Dependent Rights A Dependent will have no rights under this Group Policy except as set forth in PART III, Section F, Article 2. Article 10 - Policy Interpretation This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6003 Section A - Contract, Page 3",0.404591,14
6,"{'Page_No.': 'Page 22', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","The number of Members insured for Dependent Life Insurance will be multiplied by the premium rate then in effect. To ensure accurate premium calculations, the Policyholder is responsible for reporting to The Principal, the following information during the stated time periods: a. Members who are eligible to become insured are to be reported during the month prior to or during the month that coverage becomes effective. b. Members whose coverage has terminated are to be reported within a month of the date coverage terminated. c. Changes in Member insurance class are to be reported within a month of the date that the change in insurance class took place. If a Member is added or a present Member's insurance is increased or terminated on other than the first of an Insurance Month, premium for that Member will be adjusted and applied as if the change were to take place on the first of the next following Insurance Month. Article 5 - Contributions from Members Members are not required to contribute a part of the premium for their Member insurance under this Group Policy. Members are required to contribute a part of the premium for their Dependent's insurance under this Group Policy. This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6004 Section B - Premiums, Page 3",0.408936,18
7,"{'Page_No.': 'Page 54', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","f . claim requirements listed in PART IV, Section D, must be satisfied; and g. all medical evidence must be satisfactory to The Principal. Article 3 - Benefits Payable If all of the benefit qualifications are met, The Principal will pay: a. 100% of the Scheduled Benefit (or approved amount, if applicable) in force for loss of life; or b. 50% of the Scheduled Benefit (or approved amount, if applicable) in force if one hand is severed at or above the wrist; or c. 25% of the Scheduled Benefit (or approved amount, if applicable) in force for loss of thumb and index finger on the same hand; or d. 50% of the Scheduled Benefit (or approved amount, if applicable) in force if one foot is severed at or above the ankle; or e. 50% of the Scheduled Benefit (or approved amount, if applicable) in force if the sight of one eye is permanently lost (For this purpose, vision not correctable to better than 20/200 will be considered loss of sight.); or f. 100% of the Scheduled Benefit (or approved amount, if applicable) in force for more than one of the losses listed in b., d., or e. above. Total payment for all losses under this Article 3 that result from the same accident will not exceed the Scheduled Benefit (or approved amount, if applicable). Payment for loss of life will be to the beneficiary named for Member Life Insurance. Payment will be subject to the Beneficiary, Facility of Payment and Settlement of Proceeds provisions of PART IV, Section A. Payment for all other losses will be to the Member. Disappearance It will be presumed that a Member has lost his or her life if: a. the Member's body has not been found within 365 days after the disappearance of a conveyance in which the Member was an occupant at the time of disappearance; and b. the disappearance of the conveyance was due to its accidental wrecking or sinking; and c. this Group Policy would have covered the injury resulting from the accident. This policy has been updated effective January 1, 2014 PART IV - BENEFITS GC 6015 Section B - Member Accidental Death and Dismemberment Insurance, Page 2",0.429699,50
8,"{'Page_No.': 'Page 17', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","a. be actively engaged in business for profit within the meaning of the Internal Revenue Code, or be established as a legitimate nonprofit corporation within the meaning of the Internal Revenue Code; and b. make at least the level of premium contributions required for insurance on its eligible Members. The Policyholder must: (1) contribute at least 50% of the required premium for all Members (including disabled Members, if any); and c. if the Member is to contribute part of the premium, maintain the following participation percentages with respect to eligible employees and Dependents, excluding those for whom Proof of Good Health is not satisfactory to The Principal: (1) Employees: - at least 75% of all eligible employees must enroll; (2) Dependents: - maintain a Dependent participation of at least 75% of eligible Dependents; and d. if the Member is to contribute no part of the premium, 100% of eligible employees and Dependents must enroll. Article 4 - Policy Incontestability In the absence of fraud, after this Group Policy has been in force two years, The Principal may not contest its validity except for nonpayment of premium. Article 5 - Individual Incontestability All statements made by any individual insured under this Group Policy will be representations and not warranties. In the absence of fraud, these statements may not be used to contest an insured person's insurance unless: a. the insured person's insurance has been in force for less than two years during the insured's lifetime; and b. the statement is in Written form Signed by the insured person; and This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6003 Section A - Contract, Page 2",0.43042,13
9,"{'Page_No.': 'Page 8', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","Section A - Member Life Insurance Schedule of Insurance Article 1 Death Benefits Payable Article 2 Beneficiary Article 3 Facility of Payment Article 4 Settlement of Proceeds Article 5 Member Life Insurance - Coverage During Disability Article 6 Accelerated Benefits Article 7 Section B - Member Accidental Death and Dismemberment Insurance Schedule of Insurance Article 1 Benefit Qualification Article 2 Benefits Payable Article 3 Seat Belt Benefit Article 4 Loss of Use or Paralysis Benefit Article 5 Loss of Speech and/or Hearing Benefit Article 6 Repatriation Benefit Article 7 Educational Benefit Article 8 Limitations Article 9 Section C - Dependent Life Insurance Schedule of Insurance Article 1 Death Benefits Payable Article 2 Beneficiary Article 3 Section D - Claim Procedures Notice of Claim Article 1 Claim Forms Article 2 Proof of Loss Article 3 Payment, Denial and Review Article 4 Medical Examinations Article 5 Autopsy Article 6 Legal Action Article 7 Time Limits Article 8 This policy has been updated effective January 1, 2014 GC 6001 TABLE OF CONTENTS, PAGE 3",0.43757,4


## ReRanking with a Cross-Encoder

In [134]:
# Importing the CrossEncoder library from sentence_transformers

from sentence_transformers import CrossEncoder, util

In [135]:
# Initialising the cross encoder model

cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

In [136]:
# Input (query, response) pairs for each of the top 10 responses received from the semantic search to the cross encoder
# Generating the cross_encoder scores for these pairs

cross_inputs = [[query, response] for response in results_df['Documents']]
cross_rerank_scores = cross_encoder.predict(cross_inputs)

In [137]:
cross_rerank_scores

array([  0.49313617,  -0.9501785 ,  -8.053389  ,  -3.921972  ,
        -9.501407  , -10.407724  ,  -9.272299  ,  -9.281447  ,
       -10.527519  , -10.874072  ], dtype=float32)

In [138]:
# Storing the rerank_scores in results_df

results_df['Reranked_scores'] = cross_rerank_scores

In [139]:
results_df

Unnamed: 0,Metadatas,Documents,Distances,IDs,Reranked_scores
0,"{'Page_No.': 'Page 20', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","Section B - Premiums Article 1 - Payment Responsibility; Due Dates; Grace Period The Policyholder is responsible for collection and payment of all premiums due while this Group Policy is in force. Payments must be sent to the home office of The Principal in Des Moines, Iowa. The first premium is due on the Date of Issue of this Group Policy. Each premium thereafter will be due on the first of each Insurance Month. Except for the first premium, a Grace Period of 31 days will be allowed for payment of premium. ""Grace Period"" means the first 31-day period following a premium due date. The Group Policy will remain in force until the end of the Grace Period, unless the Group Policy has been terminated by notice as described in PART II, Section C. The Policyholder will be liable for payment of the premium for the time this Group Policy remains in force during the Grace Period. Article 2 - Premium Rates The premium rate(s) for each Member insured for Life Insurance will be: a. Member Life Insurance $0.210 for each $1,000 of insurance in force. b. Member Accidental Death and Dismemberment Insurance $0.025 for each $1,000 of Member Life Insurance in force. c. Dependent Life Insurance $1.46 for each Member insured for Dependent Life Insurance. If the Policyholder has at least two other eligible group insurance policies underwritten by The Principal, as determined by The Principal, the Policyholder may be eligible for a multiple policy discount. Article 3 - Premium Rate Changes The Principal may change a premium rate: a. on any premium due date, if the initial premium rate has then been in force 24 months or more and if Written notice is given to the Policyholder at least 31 days before the date of change; or This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6004 Section B - Premiums, Page 1",0.319258,16,0.493136
1,"{'Page_No.': 'Page 23', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","Section C - Policy Termination Article 1 - Failure to Pay Premium This Group Policy will terminate at the end of the Grace Period if total premium due has not been received by The Principal before the end of the Grace Period. Failure by the Policyholder to pay the premium within the Grace Period will be deemed notice by the Policyholder to The Principal to discontinue this Group Policy at the end of the Grace Period. Article 2 - Termination Rights of the Policyholder The Policyholder may terminate this Group Policy effective on the day before any premium due date by giving Written notice to The Principal prior to that premium due date. The Policyholder's issuance of a stop-payment order for any amounts used to pay premiums for the Policyholder's coverage will be considered Written notice from the Policyholder. Article 3 - Termination Rights of The Principal The Principal may nonrenew or terminate this Group Policy by giving the Policyholder 31 days advance notice in Writing, if the Policyholder: a. ceases to be actively engaged in business for profit within the meaning of the Internal Revenue Code, or be established as a legitimate nonprofit corporation within the meaning of the Internal Revenue Code; or b. fails to maintain the participation percentages requirements of PART II, Section A with respect to eligible employees, excluding those for whom Proof of Good Health is not satisfactory to The Principal; or c. fails to maintain three or more insured employees under this Group Policy; or d. fails to pay premium in accordance with the requirements of PART II, Section B; or e. has performed an act or practice that constitutes fraud or has made an intentional misrepresentation of material fact under the terms of this Group Policy; or f. does not promptly provide The Principal with information that is reasonably required; or g. fails to perform any of its obligations that relate to this Group Policy. This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6005 Section C - Policy Termination, Page 1",0.350216,19,-0.950179
2,"{'Page_No.': 'Page 21', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","b . on any date the definition of Member or Dependent is changed; and c. on any date the Policyholder's business, as specified on the Policyholder application, is changed; and d. on any date that a schedule of insurance or class of insured Members is changed; and e. on any premium due date, if the Policyholder has been receiving a multiple policy discount rate and the Policyholder drops below the minimum number of coverages to receive such discount rate; and f. on any date the premium contribution required of Members is changed; and g. with respect to Member Life Insurance, on any Policy Anniversary, if the average age, average Scheduled Benefit amount, or the male/female distribution for then insured Members has changed since the last Policy Anniversary; and h. on any Policy Anniversary, if the volume of insurance for then insured Members has increased or decreased by more than 25% since the last Policy Anniversary. If the Policyholder has other group insurance with The Principal, and if life coverage is initially added on a date other than the Policy Anniversary and it is more than six months before the next Policy Anniversary, The Principal reserves the right to change the premium rate on the next Policy Anniversary. Written notice will be given to the Policyholder at least 31 days before the date of change. If the Policyholder agrees to participate in the electronic services program of The Principal and, at a later date elects to withdraw from participation, such withdrawal may result in certain administrative fees being charged to the Policyholder. Article 4 - Premium Amount The amount of premium to be paid on each due date will be determined in these ways: a. Member Life Insurance The total volume of insurance in force will be divided by 1,000. The result will then be multiplied by the premium rate then in effect. b. Member Accidental Death and Dismemberment Insurance The total volume of insurance in force will be divided by 1,000. The result will then be multiplied by the premium rate then in effect. c. Dependent Life Insurance This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6004 Section B - Premiums, Page 2",0.381707,17,-8.053389
3,"{'Page_No.': 'Page 6', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","TABLE OF CONTENTS PART I - DEFINITIONS PART II - POLICY ADMINISTRATION Section A – Contract Entire Contract Article 1 Policy Changes Article 2 Policyholder Eligibility Requirements Article 3 Policy Incontestability Article 4 Individual Incontestability Article 5 Information to be Furnished Article 6 Certificates Article 7 Assignments Article 8 Dependent Rights Article 9 Policy Interpretation Article 10 Electronic Transactions Article 11 Section B – Premium Payment Responsibility; Due Dates; Grace Period Article 1 Premium Rates Article 2 Premium Rate Changes Article 3 Premium Amount Article 4 Contributions from Members Article 5 Section C - Policy Termination Failure to Pay Premium Article 1 Termination Rights of the Policyholder Article 2 Termination Rights of The Principal Article 3 Policyholder Responsibility to Members Article 4 Section D - Policy Renewal Renewal Article 1 PART III - INDIVIDUAL REQUIREMENTS AND RIGHTS This policy has been updated effective January 1, 2014 GC 6001 TABLE OF CONTENTS, PAGE 1",0.391255,2,-3.921972
4,"{'Page_No.': 'Page 24', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","T he Principal may terminate the Policyholder's coverage on any premium due date if the Policyholder relocates to a state where this Group Policy is not marketed, by giving the Policyholder 31 days advanced notice in Writing. Article 4 - Policyholder Responsibility to Members If this Group Policy terminates for any reason, the Policyholder must: a. notify each Member of the effective date of the termination; and b. refund or otherwise account to each Member all contributions received or withheld from Members for premiums not actually paid to The Principal. This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6005 Section C - Policy Termination, Page 2",0.400594,20,-9.501407
5,"{'Page_No.': 'Page 18', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","c . a copy of the form which contains the statement is given to the insured or the insured's beneficiary at the time insurance is contested. However, these provisions will not preclude the assertion at any time of defenses based upon the person's ineligibility for insurance under this Group Policy or upon the provisions of this Group Policy. In addition, if an individual's age is misstated, The Principal may at any time adjust premium and benefits to reflect the correct age. Article 6 - Information to be Furnished The Policyholder must, upon request, give The Principal all information needed to administer this Group Policy. If a clerical error is found in this information, The Principal may at any time adjust premium to reflect the facts. An error will not invalidate insurance that would otherwise be in force. Neither will an error continue insurance that would otherwise be terminated. The Principal may inspect, at any reasonable time, all Policyholder records, which relate to this Group Policy. Article 7 - Certificates The Principal will give the Policyholder Certificates for delivery to insured Members. The delivery of such Certificates will be in either paper or electronic format. The Certificates will be evidence of insurance and will describe the basic features of the coverage. They will not be considered a part of this Group Policy. Article 8 - Assignments No assignments of Member Life Insurance will be allowed under this Group Policy. Article 9 - Dependent Rights A Dependent will have no rights under this Group Policy except as set forth in PART III, Section F, Article 2. Article 10 - Policy Interpretation This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6003 Section A - Contract, Page 3",0.404591,14,-10.407724
6,"{'Page_No.': 'Page 22', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","The number of Members insured for Dependent Life Insurance will be multiplied by the premium rate then in effect. To ensure accurate premium calculations, the Policyholder is responsible for reporting to The Principal, the following information during the stated time periods: a. Members who are eligible to become insured are to be reported during the month prior to or during the month that coverage becomes effective. b. Members whose coverage has terminated are to be reported within a month of the date coverage terminated. c. Changes in Member insurance class are to be reported within a month of the date that the change in insurance class took place. If a Member is added or a present Member's insurance is increased or terminated on other than the first of an Insurance Month, premium for that Member will be adjusted and applied as if the change were to take place on the first of the next following Insurance Month. Article 5 - Contributions from Members Members are not required to contribute a part of the premium for their Member insurance under this Group Policy. Members are required to contribute a part of the premium for their Dependent's insurance under this Group Policy. This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6004 Section B - Premiums, Page 3",0.408936,18,-9.272299
7,"{'Page_No.': 'Page 54', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","f . claim requirements listed in PART IV, Section D, must be satisfied; and g. all medical evidence must be satisfactory to The Principal. Article 3 - Benefits Payable If all of the benefit qualifications are met, The Principal will pay: a. 100% of the Scheduled Benefit (or approved amount, if applicable) in force for loss of life; or b. 50% of the Scheduled Benefit (or approved amount, if applicable) in force if one hand is severed at or above the wrist; or c. 25% of the Scheduled Benefit (or approved amount, if applicable) in force for loss of thumb and index finger on the same hand; or d. 50% of the Scheduled Benefit (or approved amount, if applicable) in force if one foot is severed at or above the ankle; or e. 50% of the Scheduled Benefit (or approved amount, if applicable) in force if the sight of one eye is permanently lost (For this purpose, vision not correctable to better than 20/200 will be considered loss of sight.); or f. 100% of the Scheduled Benefit (or approved amount, if applicable) in force for more than one of the losses listed in b., d., or e. above. Total payment for all losses under this Article 3 that result from the same accident will not exceed the Scheduled Benefit (or approved amount, if applicable). Payment for loss of life will be to the beneficiary named for Member Life Insurance. Payment will be subject to the Beneficiary, Facility of Payment and Settlement of Proceeds provisions of PART IV, Section A. Payment for all other losses will be to the Member. Disappearance It will be presumed that a Member has lost his or her life if: a. the Member's body has not been found within 365 days after the disappearance of a conveyance in which the Member was an occupant at the time of disappearance; and b. the disappearance of the conveyance was due to its accidental wrecking or sinking; and c. this Group Policy would have covered the injury resulting from the accident. This policy has been updated effective January 1, 2014 PART IV - BENEFITS GC 6015 Section B - Member Accidental Death and Dismemberment Insurance, Page 2",0.429699,50,-9.281447
8,"{'Page_No.': 'Page 17', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","a. be actively engaged in business for profit within the meaning of the Internal Revenue Code, or be established as a legitimate nonprofit corporation within the meaning of the Internal Revenue Code; and b. make at least the level of premium contributions required for insurance on its eligible Members. The Policyholder must: (1) contribute at least 50% of the required premium for all Members (including disabled Members, if any); and c. if the Member is to contribute part of the premium, maintain the following participation percentages with respect to eligible employees and Dependents, excluding those for whom Proof of Good Health is not satisfactory to The Principal: (1) Employees: - at least 75% of all eligible employees must enroll; (2) Dependents: - maintain a Dependent participation of at least 75% of eligible Dependents; and d. if the Member is to contribute no part of the premium, 100% of eligible employees and Dependents must enroll. Article 4 - Policy Incontestability In the absence of fraud, after this Group Policy has been in force two years, The Principal may not contest its validity except for nonpayment of premium. Article 5 - Individual Incontestability All statements made by any individual insured under this Group Policy will be representations and not warranties. In the absence of fraud, these statements may not be used to contest an insured person's insurance unless: a. the insured person's insurance has been in force for less than two years during the insured's lifetime; and b. the statement is in Written form Signed by the insured person; and This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6003 Section A - Contract, Page 2",0.43042,13,-10.527519
9,"{'Page_No.': 'Page 8', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","Section A - Member Life Insurance Schedule of Insurance Article 1 Death Benefits Payable Article 2 Beneficiary Article 3 Facility of Payment Article 4 Settlement of Proceeds Article 5 Member Life Insurance - Coverage During Disability Article 6 Accelerated Benefits Article 7 Section B - Member Accidental Death and Dismemberment Insurance Schedule of Insurance Article 1 Benefit Qualification Article 2 Benefits Payable Article 3 Seat Belt Benefit Article 4 Loss of Use or Paralysis Benefit Article 5 Loss of Speech and/or Hearing Benefit Article 6 Repatriation Benefit Article 7 Educational Benefit Article 8 Limitations Article 9 Section C - Dependent Life Insurance Schedule of Insurance Article 1 Death Benefits Payable Article 2 Beneficiary Article 3 Section D - Claim Procedures Notice of Claim Article 1 Claim Forms Article 2 Proof of Loss Article 3 Payment, Denial and Review Article 4 Medical Examinations Article 5 Autopsy Article 6 Legal Action Article 7 Time Limits Article 8 This policy has been updated effective January 1, 2014 GC 6001 TABLE OF CONTENTS, PAGE 3",0.43757,4,-10.874072


In [140]:
# Returning the top 3 results from semantic search

top_3_semantic = results_df.sort_values(by='Distances')
top_3_semantic[:3]

Unnamed: 0,Metadatas,Documents,Distances,IDs,Reranked_scores
0,"{'Page_No.': 'Page 20', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","Section B - Premiums Article 1 - Payment Responsibility; Due Dates; Grace Period The Policyholder is responsible for collection and payment of all premiums due while this Group Policy is in force. Payments must be sent to the home office of The Principal in Des Moines, Iowa. The first premium is due on the Date of Issue of this Group Policy. Each premium thereafter will be due on the first of each Insurance Month. Except for the first premium, a Grace Period of 31 days will be allowed for payment of premium. ""Grace Period"" means the first 31-day period following a premium due date. The Group Policy will remain in force until the end of the Grace Period, unless the Group Policy has been terminated by notice as described in PART II, Section C. The Policyholder will be liable for payment of the premium for the time this Group Policy remains in force during the Grace Period. Article 2 - Premium Rates The premium rate(s) for each Member insured for Life Insurance will be: a. Member Life Insurance $0.210 for each $1,000 of insurance in force. b. Member Accidental Death and Dismemberment Insurance $0.025 for each $1,000 of Member Life Insurance in force. c. Dependent Life Insurance $1.46 for each Member insured for Dependent Life Insurance. If the Policyholder has at least two other eligible group insurance policies underwritten by The Principal, as determined by The Principal, the Policyholder may be eligible for a multiple policy discount. Article 3 - Premium Rate Changes The Principal may change a premium rate: a. on any premium due date, if the initial premium rate has then been in force 24 months or more and if Written notice is given to the Policyholder at least 31 days before the date of change; or This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6004 Section B - Premiums, Page 1",0.319258,16,0.493136
1,"{'Page_No.': 'Page 23', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","Section C - Policy Termination Article 1 - Failure to Pay Premium This Group Policy will terminate at the end of the Grace Period if total premium due has not been received by The Principal before the end of the Grace Period. Failure by the Policyholder to pay the premium within the Grace Period will be deemed notice by the Policyholder to The Principal to discontinue this Group Policy at the end of the Grace Period. Article 2 - Termination Rights of the Policyholder The Policyholder may terminate this Group Policy effective on the day before any premium due date by giving Written notice to The Principal prior to that premium due date. The Policyholder's issuance of a stop-payment order for any amounts used to pay premiums for the Policyholder's coverage will be considered Written notice from the Policyholder. Article 3 - Termination Rights of The Principal The Principal may nonrenew or terminate this Group Policy by giving the Policyholder 31 days advance notice in Writing, if the Policyholder: a. ceases to be actively engaged in business for profit within the meaning of the Internal Revenue Code, or be established as a legitimate nonprofit corporation within the meaning of the Internal Revenue Code; or b. fails to maintain the participation percentages requirements of PART II, Section A with respect to eligible employees, excluding those for whom Proof of Good Health is not satisfactory to The Principal; or c. fails to maintain three or more insured employees under this Group Policy; or d. fails to pay premium in accordance with the requirements of PART II, Section B; or e. has performed an act or practice that constitutes fraud or has made an intentional misrepresentation of material fact under the terms of this Group Policy; or f. does not promptly provide The Principal with information that is reasonably required; or g. fails to perform any of its obligations that relate to this Group Policy. This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6005 Section C - Policy Termination, Page 1",0.350216,19,-0.950179
2,"{'Page_No.': 'Page 21', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","b . on any date the definition of Member or Dependent is changed; and c. on any date the Policyholder's business, as specified on the Policyholder application, is changed; and d. on any date that a schedule of insurance or class of insured Members is changed; and e. on any premium due date, if the Policyholder has been receiving a multiple policy discount rate and the Policyholder drops below the minimum number of coverages to receive such discount rate; and f. on any date the premium contribution required of Members is changed; and g. with respect to Member Life Insurance, on any Policy Anniversary, if the average age, average Scheduled Benefit amount, or the male/female distribution for then insured Members has changed since the last Policy Anniversary; and h. on any Policy Anniversary, if the volume of insurance for then insured Members has increased or decreased by more than 25% since the last Policy Anniversary. If the Policyholder has other group insurance with The Principal, and if life coverage is initially added on a date other than the Policy Anniversary and it is more than six months before the next Policy Anniversary, The Principal reserves the right to change the premium rate on the next Policy Anniversary. Written notice will be given to the Policyholder at least 31 days before the date of change. If the Policyholder agrees to participate in the electronic services program of The Principal and, at a later date elects to withdraw from participation, such withdrawal may result in certain administrative fees being charged to the Policyholder. Article 4 - Premium Amount The amount of premium to be paid on each due date will be determined in these ways: a. Member Life Insurance The total volume of insurance in force will be divided by 1,000. The result will then be multiplied by the premium rate then in effect. b. Member Accidental Death and Dismemberment Insurance The total volume of insurance in force will be divided by 1,000. The result will then be multiplied by the premium rate then in effect. c. Dependent Life Insurance This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6004 Section B - Premiums, Page 2",0.381707,17,-8.053389


In [141]:
# Returning the top 3 results after reranking

top_3_rerank = results_df.sort_values(by='Reranked_scores', ascending=False)
top_3_rerank[:3]

Unnamed: 0,Metadatas,Documents,Distances,IDs,Reranked_scores
0,"{'Page_No.': 'Page 20', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","Section B - Premiums Article 1 - Payment Responsibility; Due Dates; Grace Period The Policyholder is responsible for collection and payment of all premiums due while this Group Policy is in force. Payments must be sent to the home office of The Principal in Des Moines, Iowa. The first premium is due on the Date of Issue of this Group Policy. Each premium thereafter will be due on the first of each Insurance Month. Except for the first premium, a Grace Period of 31 days will be allowed for payment of premium. ""Grace Period"" means the first 31-day period following a premium due date. The Group Policy will remain in force until the end of the Grace Period, unless the Group Policy has been terminated by notice as described in PART II, Section C. The Policyholder will be liable for payment of the premium for the time this Group Policy remains in force during the Grace Period. Article 2 - Premium Rates The premium rate(s) for each Member insured for Life Insurance will be: a. Member Life Insurance $0.210 for each $1,000 of insurance in force. b. Member Accidental Death and Dismemberment Insurance $0.025 for each $1,000 of Member Life Insurance in force. c. Dependent Life Insurance $1.46 for each Member insured for Dependent Life Insurance. If the Policyholder has at least two other eligible group insurance policies underwritten by The Principal, as determined by The Principal, the Policyholder may be eligible for a multiple policy discount. Article 3 - Premium Rate Changes The Principal may change a premium rate: a. on any premium due date, if the initial premium rate has then been in force 24 months or more and if Written notice is given to the Policyholder at least 31 days before the date of change; or This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6004 Section B - Premiums, Page 1",0.319258,16,0.493136
1,"{'Page_No.': 'Page 23', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","Section C - Policy Termination Article 1 - Failure to Pay Premium This Group Policy will terminate at the end of the Grace Period if total premium due has not been received by The Principal before the end of the Grace Period. Failure by the Policyholder to pay the premium within the Grace Period will be deemed notice by the Policyholder to The Principal to discontinue this Group Policy at the end of the Grace Period. Article 2 - Termination Rights of the Policyholder The Policyholder may terminate this Group Policy effective on the day before any premium due date by giving Written notice to The Principal prior to that premium due date. The Policyholder's issuance of a stop-payment order for any amounts used to pay premiums for the Policyholder's coverage will be considered Written notice from the Policyholder. Article 3 - Termination Rights of The Principal The Principal may nonrenew or terminate this Group Policy by giving the Policyholder 31 days advance notice in Writing, if the Policyholder: a. ceases to be actively engaged in business for profit within the meaning of the Internal Revenue Code, or be established as a legitimate nonprofit corporation within the meaning of the Internal Revenue Code; or b. fails to maintain the participation percentages requirements of PART II, Section A with respect to eligible employees, excluding those for whom Proof of Good Health is not satisfactory to The Principal; or c. fails to maintain three or more insured employees under this Group Policy; or d. fails to pay premium in accordance with the requirements of PART II, Section B; or e. has performed an act or practice that constitutes fraud or has made an intentional misrepresentation of material fact under the terms of this Group Policy; or f. does not promptly provide The Principal with information that is reasonably required; or g. fails to perform any of its obligations that relate to this Group Policy. This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6005 Section C - Policy Termination, Page 1",0.350216,19,-0.950179
3,"{'Page_No.': 'Page 6', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","TABLE OF CONTENTS PART I - DEFINITIONS PART II - POLICY ADMINISTRATION Section A – Contract Entire Contract Article 1 Policy Changes Article 2 Policyholder Eligibility Requirements Article 3 Policy Incontestability Article 4 Individual Incontestability Article 5 Information to be Furnished Article 6 Certificates Article 7 Assignments Article 8 Dependent Rights Article 9 Policy Interpretation Article 10 Electronic Transactions Article 11 Section B – Premium Payment Responsibility; Due Dates; Grace Period Article 1 Premium Rates Article 2 Premium Rate Changes Article 3 Premium Amount Article 4 Contributions from Members Article 5 Section C - Policy Termination Failure to Pay Premium Article 1 Termination Rights of the Policyholder Article 2 Termination Rights of The Principal Article 3 Policyholder Responsibility to Members Article 4 Section D - Policy Renewal Renewal Article 1 PART III - INDIVIDUAL REQUIREMENTS AND RIGHTS This policy has been updated effective January 1, 2014 GC 6001 TABLE OF CONTENTS, PAGE 1",0.391255,2,-3.921972


In [142]:
# creating a dataframe that holds the 'Documents' and 'Metadatas' from the top 3 results of the cross encoder
search_layer_output = top_3_rerank[["Documents", "Metadatas"]][:3]

# Adding the query to the DataFrame
search_layer_output ['Query'] = query

In [143]:
search_layer_output

Unnamed: 0,Documents,Metadatas,Query
0,"Section B - Premiums Article 1 - Payment Responsibility; Due Dates; Grace Period The Policyholder is responsible for collection and payment of all premiums due while this Group Policy is in force. Payments must be sent to the home office of The Principal in Des Moines, Iowa. The first premium is due on the Date of Issue of this Group Policy. Each premium thereafter will be due on the first of each Insurance Month. Except for the first premium, a Grace Period of 31 days will be allowed for payment of premium. ""Grace Period"" means the first 31-day period following a premium due date. The Group Policy will remain in force until the end of the Grace Period, unless the Group Policy has been terminated by notice as described in PART II, Section C. The Policyholder will be liable for payment of the premium for the time this Group Policy remains in force during the Grace Period. Article 2 - Premium Rates The premium rate(s) for each Member insured for Life Insurance will be: a. Member Life Insurance $0.210 for each $1,000 of insurance in force. b. Member Accidental Death and Dismemberment Insurance $0.025 for each $1,000 of Member Life Insurance in force. c. Dependent Life Insurance $1.46 for each Member insured for Dependent Life Insurance. If the Policyholder has at least two other eligible group insurance policies underwritten by The Principal, as determined by The Principal, the Policyholder may be eligible for a multiple policy discount. Article 3 - Premium Rate Changes The Principal may change a premium rate: a. on any premium due date, if the initial premium rate has then been in force 24 months or more and if Written notice is given to the Policyholder at least 31 days before the date of change; or This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6004 Section B - Premiums, Page 1","{'Page_No.': 'Page 20', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","provide the detailed view of the premium payments, the rates and amount. Also clarify about the grace period and what happens on default of payment?"
1,"Section C - Policy Termination Article 1 - Failure to Pay Premium This Group Policy will terminate at the end of the Grace Period if total premium due has not been received by The Principal before the end of the Grace Period. Failure by the Policyholder to pay the premium within the Grace Period will be deemed notice by the Policyholder to The Principal to discontinue this Group Policy at the end of the Grace Period. Article 2 - Termination Rights of the Policyholder The Policyholder may terminate this Group Policy effective on the day before any premium due date by giving Written notice to The Principal prior to that premium due date. The Policyholder's issuance of a stop-payment order for any amounts used to pay premiums for the Policyholder's coverage will be considered Written notice from the Policyholder. Article 3 - Termination Rights of The Principal The Principal may nonrenew or terminate this Group Policy by giving the Policyholder 31 days advance notice in Writing, if the Policyholder: a. ceases to be actively engaged in business for profit within the meaning of the Internal Revenue Code, or be established as a legitimate nonprofit corporation within the meaning of the Internal Revenue Code; or b. fails to maintain the participation percentages requirements of PART II, Section A with respect to eligible employees, excluding those for whom Proof of Good Health is not satisfactory to The Principal; or c. fails to maintain three or more insured employees under this Group Policy; or d. fails to pay premium in accordance with the requirements of PART II, Section B; or e. has performed an act or practice that constitutes fraud or has made an intentional misrepresentation of material fact under the terms of this Group Policy; or f. does not promptly provide The Principal with information that is reasonably required; or g. fails to perform any of its obligations that relate to this Group Policy. This policy has been updated effective January 1, 2014 PART II - POLICY ADMINISTRATION GC 6005 Section C - Policy Termination, Page 1","{'Page_No.': 'Page 23', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","provide the detailed view of the premium payments, the rates and amount. Also clarify about the grace period and what happens on default of payment?"
3,"TABLE OF CONTENTS PART I - DEFINITIONS PART II - POLICY ADMINISTRATION Section A – Contract Entire Contract Article 1 Policy Changes Article 2 Policyholder Eligibility Requirements Article 3 Policy Incontestability Article 4 Individual Incontestability Article 5 Information to be Furnished Article 6 Certificates Article 7 Assignments Article 8 Dependent Rights Article 9 Policy Interpretation Article 10 Electronic Transactions Article 11 Section B – Premium Payment Responsibility; Due Dates; Grace Period Article 1 Premium Rates Article 2 Premium Rate Changes Article 3 Premium Amount Article 4 Contributions from Members Article 5 Section C - Policy Termination Failure to Pay Premium Article 1 Termination Rights of the Policyholder Article 2 Termination Rights of The Principal Article 3 Policyholder Responsibility to Members Article 4 Section D - Policy Renewal Renewal Article 1 PART III - INDIVIDUAL REQUIREMENTS AND RIGHTS This policy has been updated effective January 1, 2014 GC 6001 TABLE OF CONTENTS, PAGE 1","{'Page_No.': 'Page 6', 'Policy_Name': 'Principal-Sample-Life-Insurance-Policy'}","provide the detailed view of the premium payments, the rates and amount. Also clarify about the grace period and what happens on default of payment?"


## Retrieval Augmented Generation

**Now we pass our query, a prompt and the search layer results to GPT 3.5 to receive a user friendly answer.**

In [144]:
# Defining the function to generate the response. 

def generate_response(query, search_layer_output):
   
    messages = [
                {"role": "system", "content":  "You are a helpful assistant in the insurance domain who can effectively answer user queries about insurance policies and documents."},
                {"role": "user", "content": f"""You are a helpful assistant in the insurance domain who can effectively answer user queries about insurance policies and documents.
                                                You have a question asked by the user in '{query}' and you have some search results from an insurance documents in the dataframe '{search_layer_output}'. 
                                                These search results are essentially one page of an insurance document that may be relevant to the user query.

                                                The column 'documents' inside this dataframe contains the actual text from the policy document and the column 'metadata' contains the policy name and source page.

                                                Use the documents in '{search_layer_output}' to answer the query '{query}'. Frame an informative answer.

                                                Follow the guidelines below when performing the task.
                                                1. Try to provide relevant/accurate numbers if available.
                                                2. You don’t have to necessarily use all the information in the dataframe. Only choose information that is relevant.
                                                3. Use the Metadatas columns in the dataframe to retrieve and cite the page numbers(s) as citation.The citations if any should appear at the end of your response
                                                4. If you can't provide the complete answer, please also provide any information that will help the user to search specific sections in the relevant cited documents.
                                                5. You are a customer facing assistant, so do not provide any information on internal workings, just answer the query directly.

                                                The generated response should answer the query directly addressing the user and avoiding additional information. 
                                                If you think that the query is not relevant to the document, reply that the query is irrelevant.
                                                """},
              ]

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages
    )

    return response.choices[0].message.content.split('\n')

In [145]:
# Generating the response

response = generate_response(query, search_layer_output)

In [146]:
# Printing the response along with the query

print("\nQuery:")
print(query)
print("\nResponse:")
print("\n".join(response))


Query:
provide the detailed view of the premium payments, the rates and amount. Also clarify about the grace period and what happens on default of payment?

Response:
The premium payments for the Group Policy are due on the first of each Insurance Month, with a Grace Period of 31 days allowed for payment. The premium rates for different types of insurance coverage are specified as follows:
- Member Life Insurance: $0.210 for each $1,000 of insurance in force
- Member Accidental Death and Dismemberment Insurance: $0.025 for each $1,000 of Member Life Insurance in force
- Dependent Life Insurance: $1.46 for each Member insured for Dependent Life Insurance
In case of default on payment, if the total premium due is not received before the end of the Grace Period, the Group Policy will terminate.

Citation: Page 20 - Principal-Sample-Life-Insurance-Policy
