## ***Step 1:*** Download Transformers Library

In [1]:
%%capture
!pip install transformers

In [2]:
import torch
from transformers import AutoTokenizer,BertTokenizerFast, BertForQuestionAnswering

## ***Step 2:*** Load the fine tuned model

In [3]:
# Define the bert tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

# Load the fine-tuned modeol
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BertForQuestionAnswering(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-23): 24 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): LayerNorm((1024,)

## ***Step 3:*** Make the prediction and evaluate it

In [4]:
def predict(context,query):

  inputs = tokenizer.encode_plus(query, context, return_tensors='pt')

  outputs = model(**inputs)
  answer_start = torch.argmax(outputs[0])  # get the most likely beginning of answer with the argmax of the score
  answer_end = torch.argmax(outputs[1]) + 1

  answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))

  return answer

def normalize_text(s):
  """Removing articles and punctuation, and standardizing whitespace are all typical text processing steps."""
  import string, re

  def remove_articles(text):
    regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
    return re.sub(regex, " ", text)

  def white_space_fix(text):
    return " ".join(text.split())

  def remove_punc(text):
    exclude = set(string.punctuation)
    return "".join(ch for ch in text if ch not in exclude)

  def lower(text):
    return text.lower()

  return white_space_fix(remove_articles(remove_punc(lower(s))))

def compute_exact_match(prediction, truth):
    return int(normalize_text(prediction) == normalize_text(truth))

def compute_f1(prediction, truth):
  pred_tokens = normalize_text(prediction).split()
  truth_tokens = normalize_text(truth).split()

  # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
  if len(pred_tokens) == 0 or len(truth_tokens) == 0:
    return int(pred_tokens == truth_tokens)

  common_tokens = set(pred_tokens) & set(truth_tokens)

  # if there are no common tokens then f1 = 0
  if len(common_tokens) == 0:
    return 0

  prec = len(common_tokens) / len(pred_tokens)
  rec = len(common_tokens) / len(truth_tokens)

  return 2 * (prec * rec) / (prec + rec)

In [5]:
def give_an_answer(context,query, answer):

  prediction = predict(context,query)
  em_score = compute_exact_match(prediction, answer)
  f1_score = compute_f1(prediction, answer)

  print(f"Insights : {query}")
  print(f"Extracted Keypoints : {prediction}")
  print(f"True Answer: {answer}")
  print(f"EM: {em_score}")
  print(f"F1: {f1_score}")
  print("\n")

  # return prediction

## ***Step 4:*** Test my model

In [None]:
# import pandas as pd

# # Assuming `give_an_answer` is a function that returns the extracted keypoints for a query

# # List of 15 fixed queries
# fixed_queries = [
#     "Add-on Covers or Riders",
#     "Automatic Restoration in policy",
#     "Co-morbidities or Pre-existing Diseases",
#     "Co-payment in insurance",
#     "Critical Illness coverage",
#     "Cashless Claims process",
#     "Deductibles in health insurance",
#     "Policy Exclusions",
#     "Coverage Inclusions",
#     "No-claim bonus impact",
#     "Network Hospitals importance",
#     "Premium factors",
#     "Sum Insured",
#     "Top-up plans in insurance",
#     "Waiting Period in policy"
# ]

# # Function to input context
# def get_context():
#     print("Enter your health policy context:")
#     return input()

# # Function to handle additional queries with the same context
# def additional_queries(context):
#     while True:
#         new_query = input("\nEnter another query (or type 'exit' to stop and start over): ")
#         if new_query.lower() == 'exit':
#             return 'exit'
#         extracted_keypoints = give_an_answer(context, new_query)
#         print(f"Query: {new_query}")
#         print(f"Extracted Keypoints: {extracted_keypoints}")

# # Main function
# def process_queries():
#     while True:
#         # Step 1: Get the context from the user
#         context = get_context()

#         # Step 2: Process the 15 fixed queries
#         results = []
#         for q in fixed_queries:
#             extracted_keypoints = give_an_answer(context, q)
#             results.append((q, extracted_keypoints))  # Append a tuple of (query, extracted_keypoints)

#         # Step 3: Display the results in a DataFrame
#         df = pd.DataFrame(results, columns=['Query', 'Extracted Keypoints'])
#         pd.set_option('display.max_colwidth', None)  # Allow full width for extracted keypoints
#         print("\nResults for the fixed queries:")
#         print(df.to_string(index=False, justify='left'))

#         # Step 4: Handle additional queries with the same context
#         while True:
#             user_choice = additional_queries(context)
#             if user_choice == 'exit':
#                 break  # Exit to start the process over again

# # Run the main function
# process_queries()


Enter your health policy context:
The comprehensive health insurance plan is designed to provide extensive coverage for individuals and families. The policy includes coverage for a wide range of medical expenses such as hospitalization, day-care procedures, critical illness, and even mental health treatments. It has a sum insured of ₹20 lakhs, with an option to increase the coverage by opting for a top-up plan. The policyholder is required to serve a waiting period of 3 years for pre-existing conditions. Additionally, there is no age limit for policy renewals, ensuring lifelong coverage. Maternity benefits are provided with a coverage limit of ₹75,000, and newborn baby coverage is available from day one. The policy allows cashless claims at more than 8,000 hospitals across the country, with a co-payment clause of 5% applicable only for claims made by senior citizens aged 65 and above. An optional add-on cover for critical illnesses like cancer and liver disease can be added to the poli

KeyboardInterrupt: Interrupted by user

In [6]:
# import pandas as pd

# Assuming `give_an_answer` is a function that returns the extracted keypoints for a query

context = """
The comprehensive health insurance plan is designed to provide extensive coverage for individuals and families.
The policy includes coverage for a wide range of medical expenses such as hospitalization, day-care procedures,
critical illness, and even mental health treatments. It has a sum insured of ₹20 lakhs, with an option to increase the coverage
by opting for a top-up plan. The policyholder is required to serve a waiting period of 3 years for pre-existing conditions.
Additionally, there is no age limit for policy renewals, ensuring lifelong coverage. Maternity benefits are provided with
a coverage limit of ₹75,000, and newborn baby coverage is available from day one. The policy allows cashless claims
at more than 8,000 hospitals across the country, with a co-payment clause of 5% applicable only for claims made by senior citizens
aged 65 and above. An optional add-on cover for critical illnesses like cancer and liver disease can be added to the policy
at an additional premium. The no-claim bonus increases the sum insured by 20% for each claim-free year, up to a maximum
of 100% of the sum insured.
"""

queries = [
    "Add-on Covers or Riders",
    "Automatic Restoration in policy",
    "Co-morbidities or Pre-existing Diseases",
    "Co-payment in insurance",
    "Critical Illness coverage",
    "Cashless Claims process",
    "Deductibles in health insurance",
    "Policy Exclusions",
    "Coverage Inclusions",
    "No-claim bonus impact",
    "Network Hospitals importance",
    "Premium factors",
    "Sum Insured",
    "Top-up plans in insurance",
    "Waiting Period in policy"
]

# # Store results in a list
# results = []

# # Execute the function for each query and store the results
# for q in queries:
#     extracted_keypoints = give_an_answer(context, q)
#     results.append((q, extracted_keypoints))  # Append a tuple of (query, extracted_keypoints)

# # Create a DataFrame from the results
# df = pd.DataFrame(results, columns=['Query', 'Extracted Keypoints'])

# # Adjust column width and print the DataFrame as a table
# pd.set_option('display.max_colwidth', None)  # Allow full width for extracted keypoints
# print(df.to_string(index=False, justify='left'))


answers = [
    "Optional add-on cover for critical illnesses like cancer and liver disease can be added to the policy at an additional premium.",
    "Not explicitly mentioned in the context.",
    "The policyholder is required to serve a waiting period of 3 years for pre-existing conditions.",
    "A co-payment clause of 5% is applicable only for claims made by senior citizens aged 65 and above.",
    "Critical illness coverage is available, and an optional add-on cover for critical illnesses like cancer and liver disease can be added.",
    "The policy allows cashless claims at more than 8,000 hospitals across the country.",
    "Not explicitly mentioned in the context.",
    "Not explicitly mentioned in the context.",
    "Coverage includes medical expenses such as hospitalization, day-care procedures, critical illness, and mental health treatments. Maternity benefits are provided with a coverage limit of ₹75,000, and newborn baby coverage is available from day one.",
    "The no-claim bonus increases the sum insured by 20% for each claim-free year, up to a maximum of 100% of the sum insured.",
    "Cashless claims are available at more than 8,000 hospitals across the country.",
    "Not explicitly mentioned in the context, but the premium may be affected by the sum insured, critical illness add-ons, and top-up plans.",
    "The sum insured is ₹20 lakhs, with an option to increase it by opting for a top-up plan.",
    "The policy has a sum insured of ₹20 lakhs, with an option to increase the coverage by opting for a top-up plan.",
    "The policyholder is required to serve a waiting period of 3 years for pre-existing conditions."
]

for q,a in zip(queries, answers):
    give_an_answer(context, q, a)


Insights : Add-on Covers or Riders
Extracted Keypoints : an optional add - on cover for critical illnesses like cancer and liver disease can be added to the policy at an additional premium
True Answer: Optional add-on cover for critical illnesses like cancer and liver disease can be added to the policy at an additional premium.
EM: 0
F1: 0.9230769230769231


Insights : Automatic Restoration in policy
Extracted Keypoints : there is no age limit for policy renewals, ensuring lifelong coverage
True Answer: Not explicitly mentioned in the context.
EM: 0
F1: 0


Insights : Co-morbidities or Pre-existing Diseases
Extracted Keypoints : pre - existing conditions
True Answer: The policyholder is required to serve a waiting period of 3 years for pre-existing conditions.
EM: 0
F1: 0.125


Insights : Co-payment in insurance
Extracted Keypoints : the policy allows cashless claims at more than 8, 000 hospitals across the country, with a co - payment clause of 5 % applicable only for claims made by s

In [7]:
context = """
This health insurance policy includes coverage for major surgeries and hospital expenses.
It offers a sum insured of ₹5 lakhs, with a 20% co-payment clause for senior citizens above 60 years of age.
The policy also covers pre-existing diseases after a waiting period of 4 years. Maternity benefits are included with a sub-limit
of ₹50,000. Claims can be made through a cashless facility at any of the listed network hospitals.
"""

queries = ["What is the sum insured under the policy?",
           "What is the co-payment percentage for senior citizens?",
           "After how many years are pre-existing diseases covered?",
           "What is the sub-limit for maternity benefits?",
           "How can claims be made?"
          ]

answers = ["₹5 lakhs",
           "20%",
           "4 years",
           "₹50,000",
           "Through a cashless facility at network hospitals"
          ]

for q,a in zip(queries,answers):
  give_an_answer(context,q,a)


Insights : What is the sum insured under the policy?
Extracted Keypoints : ₹5 lakhs
True Answer: ₹5 lakhs
EM: 1
F1: 1.0


Insights : What is the co-payment percentage for senior citizens?
Extracted Keypoints : 20 %
True Answer: 20%
EM: 1
F1: 1.0


Insights : After how many years are pre-existing diseases covered?
Extracted Keypoints : 4
True Answer: 4 years
EM: 0
F1: 0.6666666666666666


Insights : What is the sub-limit for maternity benefits?
Extracted Keypoints : ₹50, 000
True Answer: ₹50,000
EM: 0
F1: 0


Insights : How can claims be made?
Extracted Keypoints : through a cashless facility
True Answer: Through a cashless facility at network hospitals
EM: 0
F1: 0.6666666666666666




In [8]:
context = """
The premium health insurance plan provides coverage for critical illnesses like stroke, kidney failure, and organ transplants.
It includes a sum insured of ₹15 lakhs and offers a no-claim bonus of 15% for each year without claims, up to a maximum of 50%.
There is a waiting period of 2 years for pre-existing conditions. The policy also covers domiciliary hospitalization for up to 10 days.
Policyholders can avail cashless claims at over 6,000 network hospitals across the country.
"""

queries = ["What is the sum insured in this policy?",
           "What is the no-claim bonus percentage per year?",
           "What is the maximum no-claim bonus percentage limit?",
           "What is the waiting period for pre-existing conditions?",
           "How long does the policy cover domiciliary hospitalization?"
          ]

answers = ["₹15 lakhs",
           "15%",
           "50%",
           "2 years",
           "10 days"
          ]

for q,a in zip(queries,answers):
  give_an_answer(context,q,a)


Insights : What is the sum insured in this policy?
Extracted Keypoints : ₹15 lakhs
True Answer: ₹15 lakhs
EM: 1
F1: 1.0


Insights : What is the no-claim bonus percentage per year?
Extracted Keypoints : 15 %
True Answer: 15%
EM: 1
F1: 1.0


Insights : What is the maximum no-claim bonus percentage limit?
Extracted Keypoints : 50 %
True Answer: 50%
EM: 1
F1: 1.0


Insights : What is the waiting period for pre-existing conditions?
Extracted Keypoints : 2 years
True Answer: 2 years
EM: 1
F1: 1.0


Insights : How long does the policy cover domiciliary hospitalization?
Extracted Keypoints : up to 10 days
True Answer: 10 days
EM: 0
F1: 0.6666666666666666




In [9]:
context = """
The comprehensive health insurance plan is designed to provide extensive coverage for individuals and families.
The policy includes coverage for a wide range of medical expenses such as hospitalization, day-care procedures,
critical illness, and even mental health treatments. It has a sum insured of ₹20 lakhs, with an option to increase the coverage
by opting for a top-up plan. The policyholder is required to serve a waiting period of 3 years for pre-existing conditions.
Additionally, there is no age limit for policy renewals, ensuring lifelong coverage. Maternity benefits are provided with
a coverage limit of ₹75,000, and newborn baby coverage is available from day one. The policy allows cashless claims
at more than 8,000 hospitals across the country, with a co-payment clause of 5% applicable only for claims made by senior citizens
aged 65 and above. An optional add-on cover for critical illnesses like cancer and liver disease can be added to the policy
at an additional premium. The no-claim bonus increases the sum insured by 20% for each claim-free year, up to a maximum
of 100% of the sum insured.
"""

queries = ["What is the sum insured under the comprehensive health insurance plan?",
           "What is the waiting period for pre-existing conditions?",
           "What is the co-payment percentage for senior citizens?",
           "How much coverage is provided for maternity benefits?",
           "What is the maximum no-claim bonus percentage limit?",
           "How many hospitals offer cashless claims under this policy?",
           "Is there any add-on cover for critical illnesses?"
          ]

answers = ["₹20 lakhs",
           "3 years",
           "5%",
           "₹75,000",
           "100%",
           "8,000 hospitals",
           "Yes, for cancer and liver disease"
          ]

for q,a in zip(queries,answers):
  give_an_answer(context,q,a)


Insights : What is the sum insured under the comprehensive health insurance plan?
Extracted Keypoints : ₹20 lakhs
True Answer: ₹20 lakhs
EM: 1
F1: 1.0


Insights : What is the waiting period for pre-existing conditions?
Extracted Keypoints : 3 years
True Answer: 3 years
EM: 1
F1: 1.0


Insights : What is the co-payment percentage for senior citizens?
Extracted Keypoints : 5 %
True Answer: 5%
EM: 1
F1: 1.0


Insights : How much coverage is provided for maternity benefits?
Extracted Keypoints : ₹75, 000
True Answer: ₹75,000
EM: 0
F1: 0


Insights : What is the maximum no-claim bonus percentage limit?
Extracted Keypoints : 100 %
True Answer: 100%
EM: 1
F1: 1.0


Insights : How many hospitals offer cashless claims under this policy?
Extracted Keypoints : more than 8, 000
True Answer: 8,000 hospitals
EM: 0
F1: 0


Insights : Is there any add-on cover for critical illnesses?
Extracted Keypoints : an optional add - on cover for critical illnesses like cancer and liver disease can be added to 

In [10]:
context = """
The EliteCare health insurance policy is designed for high-net-worth individuals and offers extensive coverage for critical and non-critical illnesses,
as well as advanced medical treatments. The policy has a base sum insured of ₹50 lakhs, which can be increased up to ₹1 crore with a super top-up option.
Policyholders are eligible for global coverage, allowing them to seek treatments in any hospital across the world, with expenses reimbursed as per policy terms.
The policy includes a 1-year waiting period for specified diseases and a 4-year waiting period for pre-existing conditions. It also covers alternative
treatments like Ayurveda, Homeopathy, and Unani, up to 20% of the sum insured. Maternity and newborn baby benefits are included with a sub-limit of ₹1 lakh,
while coverage for genetic disorders and advanced treatments like robotic surgery is available without any sub-limits. Additionally, policyholders can
access wellness programs that provide regular health checkups, fitness tracking, and teleconsultations with specialists. A 10% co-payment clause is applicable
for claims made outside the country. The no-claim bonus accumulates at 25% per claim-free year, capped at a maximum of 200% of the sum insured.
Furthermore, policyholders can avail unlimited restoration benefits, which allow them to restore the full sum insured in case the coverage is exhausted
due to multiple claims within the policy year. EliteCare also offers a personalized concierge service that helps with hospitalization arrangements,
claim processing, and post-hospitalization follow-ups. The policy offers cashless treatment at over 10,000 network hospitals in India and unlimited
teleconsultations with doctors globally.
"""

queries = ["What is the base sum insured in the EliteCare policy?",
           "What is the maximum sum insured if the super top-up option is chosen?",
           "What is the waiting period for pre-existing conditions under this policy?",
           "Does the policy cover alternative treatments? If yes, up to what percentage?",
           "What is the sub-limit for maternity and newborn baby benefits?",
           "Is global coverage included in the EliteCare policy?",
           "How does the no-claim bonus accumulate, and what is its maximum limit?",
           "What benefits are offered under the unlimited restoration clause?",
           "What co-payment percentage applies to claims made outside the country?",
           "How many hospitals offer cashless treatment under this policy?"
          ]

answers = ["₹50 lakhs",
           "₹1 crore",
           "4 years",
           "Yes, up to 20% of the sum insured",
           "₹1 lakh",
           "Yes, global coverage is included",
           "25% per claim-free year, up to a maximum of 200%",
           "Unlimited restoration of the sum insured if exhausted due to multiple claims",
           "10%",
           "10,000 hospitals"
          ]

for q,a in zip(queries,answers):
  give_an_answer(context,q,a)


Insights : What is the base sum insured in the EliteCare policy?
Extracted Keypoints : ₹50 lakhs
True Answer: ₹50 lakhs
EM: 1
F1: 1.0


Insights : What is the maximum sum insured if the super top-up option is chosen?
Extracted Keypoints : ₹1 crore
True Answer: ₹1 crore
EM: 1
F1: 1.0


Insights : What is the waiting period for pre-existing conditions under this policy?
Extracted Keypoints : 4 - year
True Answer: 4 years
EM: 0
F1: 0.5


Insights : Does the policy cover alternative treatments? If yes, up to what percentage?
Extracted Keypoints : 20 %
True Answer: Yes, up to 20% of the sum insured
EM: 0
F1: 0.25


Insights : What is the sub-limit for maternity and newborn baby benefits?
Extracted Keypoints : ₹1 lakh
True Answer: ₹1 lakh
EM: 1
F1: 1.0


Insights : Is global coverage included in the EliteCare policy?
Extracted Keypoints : policyholders are eligible for global coverage
True Answer: Yes, global coverage is included
EM: 0
F1: 0.3636363636363636


Insights : How does the no-clai