In [81]:
!pip install transformers
!pip install datasets



In [82]:
import operator
import pandas as pd
import tensorflow as tf
import transformers
from datasets import load_dataset
from tensorflow import keras
from transformers import AutoTokenizer
from transformers import pipeline
from transformers import TFAutoModelForQuestionAnswering

In [83]:
from transformers import BertTokenizer, BertForQuestionAnswering

In [84]:
model_name = "dmis-lab/biobert-base-cased-v1.1-squad"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForQuestionAnswering.from_pretrained(model_name)

Some weights of the model checkpoint at dmis-lab/biobert-base-cased-v1.1-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [85]:
qa = pipeline("question-answering")

No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


In [86]:
model_name = "dmis-lab/biobert-base-cased-v1.1-squad"
tokenizer = BertTokenizer.from_pretrained(model_name)

In [87]:
qa_model = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


In [88]:
questions = {
    "age": "How old is the patient?",
    "complaints": "Does the patient have any complaints?",
    "reason": "What is the reason for this consultation?",
    "gender": "Gender of Patient?",
    "other symptom" :"What other symptoms does the patient have?"
}

In [89]:
text_paragraph = """
"context": "PROCEDURES:, Cystourethroscopy and transurethral resection of prostate.,COMPLICATIONS:, None.,
ADMITTING DIAGNOSIS:, Difficulty voiding.,HISTORY:, This 67-year old Hispanic male patient was admitted
 because of enlarged prostate and symptoms of bladder neck obstruction.  Physical examination revealed
normal heart and lungs.  Abdomen was negative for abnormal findings.  ,LABORATORY DATA:, BUN 19 and
creatinine 1.1.  Blood group was A, Rh positive, Hemoglobin 13, Hematocrit 32.1, Prothrombin time 12.6
seconds, PTT 37.1.  Discharge  hemoglobin 11.4, and hematocrit 33.3.  Chest x-ray calcified old
granulomatous disease, otherwise normal.  EKG was normal.  ,COURSE IN THE HOSPITAL:, The patient had a
cysto and TUR of the prostate.  Postoperative course was uncomplicated. The pathology report is pending
at the time of dictation.  He is being discharged in satisfactory condition with a good urinary stream,
minimal hematuria, and on Bactrim DS one a day for ten days with a standard postprostatic surgery
instruction sheet.  ,DISCHARGE DIAGNOSIS: , Enlarged prostate with benign bladder neck obstruction.
 ,To be followed in my office in one week and by Dr. ABC next available as an outpatient.",
"""

In [90]:
# run this code if you use text paragraph
def extract_information(text, questions):
    results = {}
    for key, question in questions.items():
        answer = qa_model(question=question, context=text)
        results[key] = answer["answer"]
    return results

extracted_info = extract_information(text_paragraph, questions)

In [91]:
# run this code if you use text paragraph
print("Extracted Information:")
for key, value in extracted_info.items():
    print(f"{key.capitalize()}: {value}")

Extracted Information:
Age: 67-year old
Complaints: COMPLICATIONS:, None
Reason: enlarged prostate and symptoms of bladder neck obstruction
Gender: Hispanic male
Other symptom: bladder neck obstruction


In [201]:
# if your data in json file  then use this code and make sure that data in json file
# not too much otherwise this code will take too much time. for test.json file it will
# take approx 4 minute and for train.json it will take approx 8-9 minutes.
file_path = '/content/test.json'   # --> if you use any other json file please  update  the path and also upload the json file  before running the code
squad_dataset = load_dataset('json', data_files={'test': file_path})

import json
with open("test.json", "r") as file:
    squad_data = json.load(file)

In [202]:
# run this code if you use json file
def extract_information(transcription, questions):
    results = {}
    for key, question in questions.items():
        if "paragraphs" in transcription and transcription["paragraphs"]:
            context = transcription["paragraphs"][0]["context"]
            answer = qa_model(question=question, context=context)
            results[key] = answer["answer"]
        else:
            results[key] = "not given"
    return results

In [203]:
# run this code if you use json file
extracted_info_list = []
for transcription in squad_data["data"]:
    extracted_info_list.append(extract_information(transcription, questions))

In [204]:
# run this code if you use json file
for idx, extracted_info in enumerate(extracted_info_list):
    print(f"\nTranscription {idx + 1} - Extracted Information:")
    for key, value in extracted_info.items():
        print(f"{key.capitalize()}: {value}")


Transcription 1 - Extracted Information:
Age: 66-year-old
Complaints: Denies chest pain, shortness of breath or any rashes or lesions
Reason: Urinary retention
Gender: male
Other symptom: benign prostatic hypertrophy

Transcription 2 - Extracted Information:
Age: 34
Complaints: No further problems have been noted upon his discharge and treatment
Reason: to pursue further allergy evaluation and treatment
Gender: 34-year-old male
Other symptom: acute angioedema or allergic reaction or sensation of impending allergic reaction

Transcription 3 - Extracted Information:
Age: 27-year-old
Complaints: no complaints
Reason: Anesthesia and pain
Gender: 27-year-old woman
Other symptom: intractable nausea, vomiting, and abdominal pain

Transcription 4 - Extracted Information:
Age: 68-year-old
Complaints: No lesion seen within the left lung
Reason: Patient needs staging CT of chest with contrast
Gender: white male
Other symptom: Supraclavicular adenopathy


In [92]:
!pip install seqeval



In [93]:
from seqeval.metrics import f1_score as seqeval_f1_score
import numpy as np

In [205]:
ground_truths1 = [
    {
        "age": "66-year-old",
        "complaints": "Denies chest pain, any rashes",
        "reason": "Urinary retention",
        "gender": "Male",
        "other symptom": "Symptoms of benign prostatic hypertrophy"
    },
    {
        "age": "34-year-old",
        "complaints": "Allergy",
        "reason": "Further allergy evaluation and treatment",
        "gender": "Male",
        "other symptom": "cute angioedema or allergic reaction"  # Impossible
    },
    {
        "age": "27-year-old",
        "complaints": "No complaint",
        "reason": "Gastric bypass",
        "gender": "Woman",
        "other symptom": "intractable nausea,vomiting and abdominal pain"  # Impossible
    },
    {
        "age": "68-year-old",
        "complaints": "lesion seen within the left lung",
        "reason": "Patient needs staging CT of chest with contrast",  # Impossible
        "gender": "Male",
        "other symptom": "Supraclavicular"  # Impossible
    }
]


In [206]:
def calculate_f1_score(predictions, ground_truths):
    f1_scores = {}

    # Normalize keys to lowercase for consistent comparison
    predictions = {key.lower(): value for key, value in predictions.items()}
    ground_truths = {key.lower(): value for key, value in ground_truths.items()}

    for field in predictions.keys():
        pred = predictions.get(field, "")
        truth = ground_truths.get(field, "")

        # Normalize integer fields to strings
        if isinstance(truth, int):
            truth = str(truth)
        if isinstance(pred, int):
            pred = str(pred)

        # Special handling for age field to normalize format
        if field == "age":
            pred = f"{pred}-year old" if pred.isdigit() else pred

        if truth is None or pred is None:
            f1_scores[field] = 0.0
            continue

        # Convert to lowercase for case-insensitive comparison
        pred_tokens = pred.lower().split()
        truth_tokens = truth.lower().split()

        # Create binary labels for matching tokens
        true_positive = len(set(pred_tokens) & set(truth_tokens))
        precision = true_positive / len(pred_tokens) if pred_tokens else 0
        recall = true_positive / len(truth_tokens) if truth_tokens else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        f1_scores[field] = f1

    return f1_scores


# Example data
ground_truths = {
    "age": "67-year old",
    "complaints": "Difficulty.",
    "reason": "Enlarged prostate with benign bladder neck obstruction.",
    "gender": "Hispanic male",
    "other symptom": "bladder neck obstruction"
}

extracted_info = {
    "age": 67,
    "complaints": "COMPLICATIONS:, None",
    "reason": "enlarged prostate and symptoms of bladder neck obstruction",
    "gender": "hispanic Male",
    "other symptom": "bladder neck obstruction"
}
f1_scores = calculate_f1_score(extracted_info, ground_truths)
print(f1_scores)


{'age': 1.0, 'complaints': 0, 'reason': 0.5333333333333333, 'gender': 1.0, 'other symptom': 1.0}


In [207]:
f1_scores = []
for ground_truth, extracted_info in zip(ground_truths1, extracted_info_list):
    f1_scores.append(calculate_f1_score(extracted_info, ground_truth))

In [208]:
for lis in f1_scores:
  print(lis)

{'age': 1.0, 'complaints': 0.625, 'reason': 1.0, 'gender': 1.0, 'other symptom': 0.7499999999999999}
{'age': 0, 'complaints': 0, 'reason': 0.8333333333333333, 'gender': 0.6666666666666666, 'other symptom': 0.5000000000000001}
{'age': 1.0, 'complaints': 0.5, 'reason': 0, 'gender': 0.6666666666666666, 'other symptom': 0.7272727272727272}
{'age': 1.0, 'complaints': 0.923076923076923, 'reason': 1.0, 'gender': 0.6666666666666666, 'other symptom': 0.6666666666666666}


In [209]:
def calculate_average_f1(scores_list):
    # Initialize a dictionary to store cumulative F1 scores and counts
    cumulative_f1_scores = {}
    counts = {}

    # Iterate through the list of F1 score dictionaries
    for scores in scores_list:
        for key, value in scores.items():
            if key not in cumulative_f1_scores:
                cumulative_f1_scores[key] = 0
                counts[key] = 0
            cumulative_f1_scores[key] += value
            counts[key] += 1

    # Calculate average F1 scores
    average_f1_scores = {key: cumulative_f1_scores[key] / counts[key] for key in cumulative_f1_scores}

    return average_f1_scores



In [186]:
average_f1_scores = calculate_average_f1(f1_scores)
print(average_f1_scores)

{'age': 0.75, 'complaints': 0.5120192307692307, 'reason': 0.7083333333333333, 'gender': 0.7499999999999999, 'other symptom': 0.6609848484848484}
