In [50]:
import yaml
import tiktoken

from langchain import OpenAI, VectorDBQA, LLMChain
from langchain.prompts import PromptTemplate

from pdf_loaders import PdfToTextLoader
from dataset_vectorizers import DatasetVectorizer

with open("config.yml", "r") as f:
    config = yaml.safe_load(f)

OPENAI_API_KEY = config['OPENAI_KEY']

## Loading data from PDF

In [51]:
# data taken from https://www.freelancersunion.org/insurance/health/bronze-plans-nyc/
PDFS, NAMES, TXTS  = [
    './data/2023_EmblemHealth_Bronze_D.pdf',
    './data/BH2_SB_OX_2023_v1_-_Bronze_HSA_Non-Standard_Off-Exchange.pdf'
], [
    'EmblemHealth', 
    'MetroPlus'
], []


In [52]:
for pdf_path in PDFS:
    txt_path = pdf_path.replace(".pdf", ".txt")
    pdf_loader = PdfToTextLoader(pdf_path, txt_path)
    text = pdf_loader.load_pdf()
    TXTS.append(txt_path)

## Vectorizing dataset

In [53]:
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 500

In [54]:
dataset_vectorizer = DatasetVectorizer()
documents_1, texts_1, docsearch_1 = dataset_vectorizer.vectorize([TXTS[0]], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, openai_key=OPENAI_API_KEY)
documents_2, texts_2, docsearch_2 = dataset_vectorizer.vectorize([TXTS[1]], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, openai_key=OPENAI_API_KEY)

Using embedded DuckDB without persistence: data will be transient
Using embedded DuckDB without persistence: data will be transient


## Asking questions

In [55]:
QUESTIONS = [
    'How good are the deductibles?',
    "How is the preventive care coverage?",
    'How this plan fits for remote workers in the US and abroad?',
    'What is the maximum money amount that can be compensated?',
    'Can I go to any hospital of my choice?',
    'Are there any limitations that won\'t allow to use the insurance?',
    'Does it cover the family members of the applicant?',
    'What are the healthcare procedures that are not covered by the insurance?',
    'Can I use the insurance for the dental care?',
    'Can I use the insurance in other countries?'
]

In [56]:
llm = OpenAI(model_name='text-davinci-003', temperature=0, openai_api_key=OPENAI_API_KEY)
qa_chain_1 = VectorDBQA.from_chain_type(llm=llm, chain_type='stuff', vectorstore=docsearch_1)
qa_chain_2 = VectorDBQA.from_chain_type(llm=llm, chain_type='stuff', vectorstore=docsearch_2)



In [57]:
summary_of_answers = ""
for q in QUESTIONS:
    print(q)
    answer_1, answer_2 = qa_chain_1.run(q), qa_chain_2.run(q)
    summary_of_answers += "Question: " + q + "\n"
    summary_of_answers += f"{NAMES[0]} answer: " + answer_1 + f";\n {NAMES[1]} answer: " + answer_2 + "\n"
    print(NAMES[0], answer_1)
    print(NAMES[1], answer_2)
    print('-' * 10)

How good are the deductibles?
EmblemHealth  The overall deductible is $4,700 individual / $9,400 family. There are no other specific deductibles. Certain preventive services are covered without cost-sharing and before you meet your deductible.
MetroPlus  The deductibles are $6,100 for individuals and $12,200 for families.
----------
How is the preventive care coverage?
EmblemHealth  This plan covers certain preventive services without cost-sharing and before you meet your deductible. See a list of covered preventive services at https://www.healthcare.gov/coverage/preventive-care-benefits/.
MetroPlus  Preventive care is covered in full for participating providers. Non-participating providers are not covered and the member is responsible for the full cost.
----------
How this plan fits for remote workers in the US and abroad?
EmblemHealth  This plan provides Minimum Essential Coverage and meets the Minimum Value Standard. It includes coverage for abortion services, bariatric surgery, chi

In [58]:
encoder = tiktoken.encoding_for_model("gpt-3.5-turbo")
len(encoder.encode(summary_of_answers))

1050

In [59]:
print(summary_of_answers)

Question: How good are the deductibles?
EmblemHealth answer:  The overall deductible is $4,700 individual / $9,400 family. There are no other specific deductibles. Certain preventive services are covered without cost-sharing and before you meet your deductible.;
 MetroPlus answer:  The deductibles are $6,100 for individuals and $12,200 for families.
Question: How is the preventive care coverage?
EmblemHealth answer:  This plan covers certain preventive services without cost-sharing and before you meet your deductible. See a list of covered preventive services at https://www.healthcare.gov/coverage/preventive-care-benefits/.;
 MetroPlus answer:  Preventive care is covered in full for participating providers. Non-participating providers are not covered and the member is responsible for the full cost.
Question: How this plan fits for remote workers in the US and abroad?
EmblemHealth answer:  This plan provides Minimum Essential Coverage and meets the Minimum Value Standard. It includes co

## Asking to compare the answers

In [60]:
template = """
I want you to act as an expert in insurance policies. I have asked two companies about their insurance policies and here are their answers:
{summary_of_answers}
I am looking for insurance for a full-remote consulting company with 100 employees. I want you to tell me which company is better and why.
Give me a rating (x out of 10) for the following categories for each company separately with a short explanation (10 words max) for each category:
1. Coverage of different health procedures
2. Flexibility for remote workers abroad
3. Price and compensation
Your answer:
"""

prompt = PromptTemplate(
    input_variables=["summary_of_answers"],
    template=template,
)

In [61]:
llm = OpenAI(model_name='text-davinci-003', temperature=0, openai_api_key=OPENAI_API_KEY, request_timeout=60)
chain = LLMChain(llm=llm, prompt=prompt)

In [62]:
answer = chain.run(summary_of_answers)

In [63]:
print(answer)


EmblemHealth: 
1. Coverage of different health procedures: 8/10 - covers most procedures. 
2. Flexibility for remote workers abroad: 4/10 - no coverage outside US. 
3. Price and compensation: 7/10 - reasonable deductible.

MetroPlus: 
1. Coverage of different health procedures: 6/10 - some procedures not covered. 
2. Flexibility for remote workers abroad: 2/10 - no coverage outside US. 
3. Price and compensation: 8/10 - reasonable deductible and compensation.

Overall, EmblemHealth is the better option for a full-remote consulting company with 100 employees. It offers better coverage of different health procedures and a more reasonable price and compensation.
