In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from ollama import chat
from ollama import ChatResponse
import pandas as pd
from tqdm import tqdm
from rouge_score import rouge_scorer
import sacrebleu
from bert_score import score as bert_score
from medcon import medcon
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


# Prompt Templates

### Zero Shot Prompting

In [2]:
# ---------------- Zero-Shot Prompt Templates ----------------
finding_zero_shot = """
Expertise: You are an expert medical professional.

Instruction:
Summarize the radiology report findings into an impression with minimal text.
"""

summerization_zero_shot = """
Expertise: You are a helpful medical assistant.

Instruction:
Summarize the patient health query into one question of 15 words or less.
"""

note_zero_shot = """
Expertise: You are an expert clinician.

Instruction:
Based on the progress note, generate a list of 3–7 problems (a few words each) 
ranked in order of importance.
"""

dialogue_zero_shot = """
Expertise: You are an expert physician.

Instruction:
Summarize the patient/doctor dialogue into an assessment and plan.
"""

# example = radiology_zero_shot.format(radiology_findings_text="CT scan shows left lung mass with mediastinal shift.")
# print(example)


### Few Shot Prompting

In [3]:
import pandas as pd

# df = pd.read_csv('/home/tahlilmahfuz/NLP Project/train/findings.csv')
# df = pd.read_csv('/home/tahlilmahfuz/NLP Project/train/summarization.csv')
# df = pd.read_csv('/home/tahlilmahfuz/NLP Project/train/notes.csv')
df = pd.read_csv('/home/tahlilmahfuz/NLP Project/train/dialogues.csv')
df.head(5)

Unnamed: 0.1,Unnamed: 0,dialogue,assessment
0,0,"hi , martha . how are you ? i'm doing okay . ...",CHIEF COMPLAINT Annual exam. HISTORY OF PRESEN...
1,1,"hi , andrew , how are you ? hi . good to see ...",CHIEF COMPLAINT Joint pain. HISTORY OF PRESENT...
2,2,"hi , john . how are you ? hey . well , relati...",CHIEF COMPLAINT Back pain. HISTORY OF PRESENT ...
3,3,"hi , james , how are you ? hey , good to see ...",CHIEF COMPLAINT Back pain. HISTORY OF PRESENT ...
4,4,"hey , ms. hill . nice to see you . hi , dr. j...",CC: Right middle finger pain. HPI: Ms. Hill is...


In [4]:
# ---------------- Few-Shot Prompt Templates ----------------
finding_few_shot = """
Expertise: You are an expert medical professional.

Instruction:
Summarize the radiology report findings into an impression with minimal text.

Examples
Use the examples to guide word choice.

finding 1: Cardiomediastinal silhouette is normal. There is no pleural effusion or pneumothorax. There is no focal lung consolidation.
impression 1: No radiographic explanation for chest pain.
##
finding 2: The lungs are relatively well-expanded and clear. The cardiomediastinal silhouette is unremarkable. The hilar pleural surfaces are within normal limits. Mild degenerative changes are noted throughout the thoracic spine.
impression 2: No acute cardiopulmonary process.
##

"""

summerization_few_shot = """
Expertise
You are a helpful medical assistant.

Instruction (task-specific)
Summarize the patient health query into one question of 15 words or less.

Examples
Use the examples to guide word choice.

prompt 1: 
Task: Convert verbose patient questions into short medical questions.
Input: acute constipation Sir, 1. I wish to know if severe constipation/non clearence of bowels is also related to Diabetes in old age people. 2. Request if you may provide any medical paper associated to my query.

response 1: Does diabetes affect bowel movements?
##
prompt 2: 
Task: Convert verbose patient questions into short medical questions.
Input: I was trying get more information on Pain associated with Polycythemia Vera and medications usually used to treat pain in Polycythemia vera. Thank you, [NAME]

response 2: What treatments are available for pain associated with Polycythemia Vera?
##
"""

note_few_shot = """
Expertise
You are an expert clinician.

Instruction (task-specific)
Based on the progress note, generate a list of 3–7 problems (a few words each) 
ranked in order of importance.

Examples
Use the examples to guide word choice.

note 1: 
**History** The patient reported experiencing a cough, primarily occurring in the morning, which has persisted for several days. Additionally, they described a scraping pain in the windpipe, particularly noticeable during deep breaths and coughing episodes. The patient has a history of smoking, which may exacerbate respiratory symptoms. They have not experienced dyspnea or fever. The patient mentioned that the cough is productive at times with occasional slightly discolored sputum. **Physical Examination** Upon examination, the patient's lung sounds were clear with no wheezes, rales, or rhonchi. There was no evidence of respiratory distress, and oxygen saturation was within the normal range. Chest symmetry observed with no use of accessory muscles noted. Throat examination revealed mild erythema but no significant abnormalities.
problem 1:
**History** Pt reports morning cough for several days with scraping pain in windpipe, esp. on deep breaths and coughing. HX: smoker. No dyspnea/fever. Occasionally, cough is productive with slightly discolored sputum. **Physical Examination** Lungs: CTA, no wheezes/rales/rhonchi. No respiratory distress; O2 sat WNL; chest symm., no accessory muscle use. Throat: mild erythema, otherwise unremarkable.
##
note 2: 
**History** Patient reports experiencing burning pain localized to the trachea. The onset of symptoms began approximately three days ago. The patient describes the pain as constant with varying degrees of intensity throughout the day, and it worsens when breathing deeply. There are no associated nasal symptoms such as congestion or runny nose. The patient denies experiencing any dyspnea, wheezing, or shortness of breath. No prior history of similar pain episodes. **Physical Examination** Vital signs are within normal limits. Inspection of the oropharynx reveals no abnormalities. Respiratory examination indicates normal breath sounds with no wheezing or crackles auscultated. No tracheal deviation or palpable tracheal tenderness noted. Cardiovascular examination is unremarkable. The patient's overall physical examination does not reveal any overt cause for the burning tracheal pain.
problem 2:
**History** Pt c/o burning pain localized to trachea x3 days. Pain constant, varies in intensity, worse with deep breaths. No assoc. nasal sx (congestion, runny nose). Pt denies dyspnea, wheezing, sob. No prior similar episodes. **Physical Examination** VS WNL. Oropharynx nl. Resp exam: nl breath sounds, no wheezing/crackles. No tracheal deviation or tenderness. CVS exam unremarkable. Overall, PE does not reveal overt cause for burning tracheal pain.
##
"""

dialogue_few_shot = """
Expertise: You are an expert radiologist.

Instruction (task-specific)
Summarize the patient/doctor dialogue into an assessment and plan.

Examples
Use the examples to guide word choice.

dialogue 1: 
hi , martha . how are you ?  i'm doing okay . how are you ?  i'm doing okay . so , i know the nurse told you about dax . i'd like to tell dax a little bit about you , okay ?  okay .  martha is a 50-year-old female with a past medical history significant for congestive heart failure , depression and hypertension who presents for her annual exam . so , martha , it's been a year since i've seen you . how are you doing ?  i'm doing well . i've been traveling a lot recently since things have , have gotten a bit lighter . and i got my , my vaccine , so i feel safer about traveling . i've been doing a lot of hiking . uh , went to washington last weekend to hike in northern cascades, like around the mount baker area .  nice . that's great . i'm glad to hear that you're staying active , you know . i , i just love this weather . i'm so happy the summer is over . i'm definitely more of a fall person .  yes , fall foliage is the best .  yeah . um , so tell me , how are you doing with the congestive heart failure ? how are you doing watching your diet ? i know we've talked about watching a low sodium diet . are you doing okay with that ?  i've been doing well with that . i resisted , as much , as i could , from the tater tots , you know , the soft pretzels , the salty foods that i , i love to eat . and i've been doing a really good job .  okay , all right . well , i'm glad to hear that . and you're taking your medication ?  yes .  okay , good . and any symptoms like chest pains , shortness of breath , any swelling in your legs ?  no , not that i've noticed .  okay , all right . and then in terms of your depression , i know that we tried to stay off of medication in the past because you're on medications for your other problems . how are you doing ? and i know that you enrolled into therapy . is that helping ? or-  yeah , it's been helping a lot . i've been going every week , um , for the past year since my last annual exam . and that's been really helpful for me .  okay . so , no , no issues , no feelings of wanting to harm yourself or hurt others ?  no , nothing like that .  okay , all right . and then in terms of your high blood pressure , i know that you and i have kind of battled in the past with you remembering to take some of your blood pressure medications . how are you doing with that ?  i'm still forgetting to take my blood pressure medication . and i've noticed when work gets more stressful , my blood pressure goes up .  okay . and , and so how has work going for you ?  it's been okay . it's been a lot of long hours , late nights . a lot of , um , you know , fiscal year end data that i've been having to pull . so , a lot of responsibility , which is good . but with the responsibility comes the stress .  yeah , okay , all right . i understand . um , all right . well , i know that you did a review of system sheet when you checked in with the nurse . i know that you were endorsing some nasal congestion from some of the fall pollen and allergies . any other symptoms , nausea or vomiting , abdominal pain , anything like that ?  no , nothing like that .  no , okay , all right . well , i'm gon na go ahead and do a quick physical exam , okay ?  okay .  hey , dragon , show me the blood pressure . so , yeah , looking at your blood pressure today here in the office , it is a little elevated . you know , it could just , you could just be nervous . uh , let's look at some of the past readings . hey , dragon , show me the blood pressure readings . hey , dragon , show me the blood pressure readings . here we go . uh , so they are running on the higher side . um , y- you know , i , i do think that , you know , i'd like to see you take your medication a little bit more , so that we can get that under control a little bit better , okay ?  okay .  so , i'm just gon na check out your heart and your lungs . and you know , let you know what i find , okay ?  okay .  okay . so , on your physical examination , you know , everything looks good . on your heart exam , i do appreciate a three out of six systolic ejection murmur , which i've heard in the past , okay ? and on your lower extremities , i do appreciate one plus pitting edema , so you do have a little bit of fluid in your legs , okay ?  okay .  let's go ahead , i wan na look at some of your results , okay ? hey , dragon , show me the echocardiogram . so , this is the result of the echocardiogram that we did last year . it showed that you have that low-ish pumping function of your heart at about 45 % . and it also sh- shows some mitral regurgitation , that's that heart murmur that i heard , okay ?  um , hey , dragon , show me the lipid panel . so , looking at your lipid panel from last year , you know , everything , your cholesterol was like , a tiny bit high . but it was n't too , too bad , so i know you're trying to watch your diet . so , we'll repeat another one this year , okay ?  okay .  um , so i wan na just go over a little bit about my assessment and my plan for you , okay ? so , for your first problem your congestive heart failure , um , i wan na continue you on your current medications . but i do wan na increase your lisinopril to 40 milligrams a day , just because your blood pressure's high . and you know , you are retaining a little bit of fluid . i also wan na start you on some lasix , you know , 20 milligrams a day . and have you continue to watch your , your diet , okay ?  okay .  i also wan na repeat another echocardiogram , okay ?  all right .  hey , dragon , order an echocardiogram . from a depression standpoint , it sounds like you're doing really well with that . so , i'm , i'm really happy for you . i'm , i'm glad to see that you're in therapy and you're doing really well . i do n't feel the need to start you on any medications this year , unless you feel differently .  no , i feel the same way .  okay , all right . and then for your last problem your hypertension , you know , again i , i , i think it's out of control . but we'll see , i think , you know , i'd like to see you take the lisinopril as directed , okay ? uh , i want you to record your blood pressures within the patient , you know , take your blood pressure every day . record them to me for like , about a week , so i have to see if we have to add another agent , okay ? 'cause we need to get that under better control for your heart failure to be more successful , okay ?  okay .  do you have any questions ? , and i forgot . for your annual exam , you're due for a mammogram , so we have to schedule for that , as well , okay ?  okay .  okay . do you have any questions ?  can i take all my pills at the same time ?  yeah .  'cause i've been trying to take them at different times of the day , 'cause i did n't know if it was bad to take them all at once or i should separate them . i do n't know .  yeah . you can certainly take them , you know , all at the same time , as long , as yeah , they're all one scale . you can take them all at the same time . just set an alarm-  okay .  . some time during the day to take them , okay ?  that might help me remember better .  all right . that sounds good . all right , well , it's good to see you .  good seeing you too .  hey , dragon , finalize the note .
assessment 1:
CHIEF COMPLAINT Annual exam. HISTORY OF PRESENT ILLNESS Martha Collins is a 50-year-old female with a past medical history significant for congestive heart failure, depression, and hypertension who presents for her annual exam. It has been a year since I last saw the patient. The patient has been traveling a lot recently since things have gotten a bit better. She reports that she got her COVID-19 vaccine so she feels safer about traveling. She has been doing a lot of hiking. She reports that she is staying active. She has continued watching her diet and she is doing well with that. The patient states that she is avoiding salty foods that she likes to eat. She has continued utilizing her medications. The patient denies any chest pain, shortness of breath, or swelling in her legs. Regarding her depression, she reports that she has been going to therapy every week for the past year. This has been really helpful for her. She denies suicidal or homicidal ideation. The patient reports that she is still forgetting to take her blood pressure medication. She has noticed that when work gets more stressful, her blood pressure goes up. She reports that work has been going okay, but it has been a lot of long hours lately. She endorses some nasal congestion from some of the fall allergies. She denies any other symptoms of nausea, vomiting, abdominal pain. REVIEW OF SYSTEMS • Ears, Nose, Mouth and Throat: Endorses nasal congestion from allergies. • Cardiovascular: Denies chest pain or dyspnea on exertion. • Respiratory: Denies shortness of breath. • Gastrointestinal: Denies abdominal pain, nausea, or vomiting. • Psychiatric: Endorses depression. Denies suicidal or homicidal ideations. PHYSICAL EXAMINATION • Cardiovascular: Grade 3/6 systolic ejection murmur. 1+ pitting edema of the bilateral lower extremities. VITALS REVIEWED • Blood Pressure: Elevated. RESULTS Echocardiogram demonstrates decreased ejection fraction of 45%. Mitral regurgitation is present. Lipid panel: Elevated cholesterol. ASSESSMENT AND PLAN Martha Collins is a 50-year-old female with a past medical history significant for congestive heart failure, depression, and hypertension who presents for her annual exam. Congestive heart failure. • Medical Reasoning: She has been compliant with her medication and dietary modifications. Her previous year's echocardiogram demonstrated a reduced ejection fraction of 45%, as well as some mitral regurgitation. Her cholesterol levels were slightly elevated on her lipid panel from last year. • Additional Testing: We will order a repeat echocardiogram. We will also repeat a lipid panel this year. • Medical Treatment: She will continue with her current medications. We will increase her lisinopril to 40 mg daily and initiate Lasix 20 mg daily. • Patient Education and Counseling: I encouraged her to continue with dietary modifications. Depression. • Medical Reasoning: She is doing well with weekly therapy. Hypertension. • Medical Reasoning: She has been compliant with dietary modifications but has been inconsistent with the use of her medication. She attributes elevations in her blood pressure to increased stress. • Medical Treatment: We will increase her lisinopril to 40 mg daily as noted above. • Patient Education and Counseling: I encouraged the patient to take her lisinopril as directed. I advised her to monitor her blood pressures at home for the next week and report them to me. Healthcare maintenance. • Medical Reasoning: The patient is due for her routine mammogram. • Additional Testing: We will order a mammogram and have this scheduled for her. Patient Agreements: The patient understands and agrees with the recommended medical treatment plan.
Plan: Adjust medications, recommend dietary changes, follow-up in 2 weeks
##
dialogue 2: 
hi , andrew , how are you ?  hi . good to see you .  it's good to see you as well . so i know that the nurse told you about dax . i'd like to tell dax a little bit about you .  sure .  okay ? so , andrew is a 62-year-old male with a past medical history significant for a kidney transplant , hypothyroidism , and arthritis , who presents today with complaints of joint pain . andrew , what's going on with your joint ? what happened ?  uh , so , over the the weekend , we've been moving boxes up and down our basements stairs , and by the end of the day my knees were just killing me .  okay . is , is one knee worse than the other ?  equally painful .  okay .  both of them .  and did you , did you injure one of them ?  um , uh , i've had some knee problems in the past but i think it was just the repetition and the weight of the boxes .  okay . all right . and , and what have you taken for the pain ?  a little tylenol . i iced them for a bit . nothing really seemed to help , though .  okay . all right . um , and does it prevent you from doing , like , your activities of daily living , like walking and exercising and things like that ?  uh , saturday night it actually kept me up for a bit . they were pretty sore .  mm-hmm . okay . and any other symptoms like fever or chills ?  no .  joint pain ... i mean , like muscle aches ?  no .  nausea , vomiting , diarrhea ?  no .  anything like that ?  no .  okay . all right . now , i know that you've had the kidney transplant a few years ago for some polycystic kidneys .  mm-hmm .  um , how are you doing with that ? i know that you told dr. gutierrez-  mm .  . a couple of weeks ago .  yes .  everything's okay ?  so far , so good .  all right . and you're taking your immunosuppressive medications ?  yes , i am .  okay . all right . um , and did they have anything to say ? i have n't gotten any reports from them , so ...  no , n- nothing out of the ordinary , from what they reported .  okay . all right . um , and in terms of your hyperthyroidism , how are you doing with the synthroid ? are you doing okay ?  uh , yes , i am .  you're taking it regularly ?  on the clock , yes .  yes . okay . and any fatigue ? weight gain ? anything like that that you've noticed ?  no , nothing out of the ordinary .  okay . and just in general , you know , i know that we've kind of battled with your arthritis .  mm-hmm .  you know , it's hard because you ca n't take certain medications 'cause of your kidney transplant .  sure .  so other than your knees , any other joint pain or anything like that ?  every once in a while , my elbow , but nothing , nothing out of the ordinary .  okay . all right . now i know the nurse did a review of systems sheet when you checked in . any other symptoms i might have missed ?  no .  no headaches ?  no headaches .  anything like that w- ... okay . all right . well , i wan na go ahead and do a quick physical exam , all right ? hey , dragon , show me the vital signs . so here in the office , your vital signs look good . you do n't have a fever , which is good .  mm-hmm .  your heart rate and your , uh , blood pressure look fine . i'm just gon na check some things out , and i'll let you know what i find , okay ?  perfect .  all right . does that hurt ?  a little bit . that's tender .  okay , so on physical examination , on your heart exam , i do appreciate a little two out of six systolic ejection murmur-  mm-hmm .  . which we've heard in the past . okay , so that seems stable . on your knee exam , there is some edema and some erythema of your right knee , but your left knee looks fine , okay ? um , you do have some pain to palpation of the right knee and some decreased range of motion , um , on exam , okay ? so what does that mean ? so we'll go ahead and we'll see if we can take a look at some of these things . i know that they did an x-ray before you came in , okay ?  mm-hmm .  so let's take a look at that .  sure .  hey , dragon , show me the right knee x-ray . so here's the r- here's your right knee x-ray . this basically shows that there's good bony alignment . there's no acute fracture , which is not surprising , based on the history .  mm-hmm .  okay ? hey , dragon , show me the labs . and here , looking at your lab results , you know , your white blood cell count is not elevated , which is good . you know , we get concerned about that in somebody who's immunocompromised .  mm-hmm .  and it looks like your kidney function is also very good . so i'm , i'm very happy about that .  yeah .  okay ? so i just wan na go over a little bit about my assessment and my plan for you .  mm-hmm .  so for your knee pain , i think that this is an acute exacerbation of your arthritis , okay ? so i wan na go ahead and if ... and prescribe some ultram 50 milligrams every six hours as needed .  okay .  okay ? i also wan na go ahead and just order an autoimmune panel , okay ? hey , dragon , order an autoimmune panel . and you know , i , i want , i want you to just take it easy for right now , and if your symptoms continue , we'll talk about further imaging and possibly referral to physical therapy , okay ?  you got it .  for your second problem , your hypothyroidism , i wan na go ahead and continue you on this ... on the synthroid , and i wan na go ahead and order some thyroid labs , okay ?  sure .  hey , dragon , order a thyroid panel . and then for your last problem , the arthritis , you know , we just kinda talked about that . you know , it's gon na be a struggle for you because again , you ca n't take some of those anti-inflammatory medications because of your kidney transplant , so ...  mm-hmm .  you know , let's see how we do over the next couple weeks , and again , we'll refer you to physical therapy if we need to , okay ?  you got it .  you have any questions ?  not at this point .  okay . hey , dragon , finalize the note .
assessment 2:
CHIEF COMPLAINT Joint pain. HISTORY OF PRESENT ILLNESS Andrew Perez is a 62-year-old male with a past medical history significant for a kidney transplant, hypothyroidism, and arthritis. He presents today with complaints of joint pain. The patient reports that over the weekend, he was moving boxes up and down the basement stairs. By the end of the day, his knees were very painful. The pain is equal in the bilateral knees. He states that he has had some knee problems in the past, but he believes that it was due to the repetition and the weight of the boxes. He states that the pain does not prevent him from doing his activities of daily living. By the end of the day on Saturday, his knee soreness interrupted his sleep. The patient has taken Tylenol and iced his knees for a short period of time, but nothing really seemed to help. The patient states that he had a kidney transplant a few years ago for some polycystic kidneys. He notes that he saw Dr. Gutierrez a couple of weeks ago, and everything was normal at that time. The patient continues to utilize his immunosuppressant medications. Regarding his hypothyroidism, the patient states that he is doing well. He has continued to utilize Synthroid regularly. In regards to his arthritis, the patient states that occasionally he has pain in his elbow, but nothing out of the ordinary. He denies any other symptoms such as fever, chills, muscle aches, nausea, vomiting, diarrhea, fatigue, and weight loss. REVIEW OF SYSTEMS • Constitutional: Denies fevers, chills, or weight loss. • Musculoskeletal: Denies muscle pain. Endorses joint pain in the bilateral knees. • Neurological: Denies headaches. PHYSICAL EXAMINATION • Cardiovascular: 2/6 systolic ejection murmur, stable. • Musculoskeletal: There is edema and erythema of the right knee with pain to palpation. Range of motion is decreased. Left knee exam is normal. RESULTS X-ray of the right knee is unremarkable. Good bony alignment. No acute fractures. Labs: Within normal limits. White blood cell count is within normal limits. Kidney function is normal. ASSESSMENT AND PLAN Andrew Perez is a 62-year-old male with a past medical history significant for a kidney transplant, hypothyroidism, and arthritis. He presents today with complaints of joint pain. Arthritis. • Medical Reasoning: The patient reports increased joint pain in his bilateral knees over the past weekend. Given that his right knee x-ray was unremarkable, I believe this is an acute exacerbation of his arthritis. • Additional Testing: We will order an autoimmune panel for further evaluation. • Medical Treatment: Initiate Ultram 50 mg every 6 hours as needed. • Patient Education and Counseling: I advised the patient to rest his knees. If his symptoms persist, we can consider further imaging and possibly a referral to physical therapy. Hypothyroidism. • Medical Reasoning: The patient is doing well on Synthroid and is asymptomatic at this time. • Additional Testing: We will order a thyroid panel. • Medical Treatment: Continue Synthroid. Status post renal transplant. • Medical Reasoning: He is doing well and has been compliant with his immunosuppressive medications. On recent labs, his white blood cell count was within a normal limits and his kidney function is stable. • Medical Treatment: Continue current regimen. Patient Agreements: The patient understands and agrees with the recommended medical treatment plan.
Plan: Start physiotherapy, prescribe NSAIDs, encourage weight management
##
"""

# In Context Learning

### Data preprocessing

In [5]:
# import pandas as pd
# import os

# if not os.path.exists('datasets'):
#     os.makedirs('dataset/train')
#     os.makedirs('dataset/validate')

# # training set
# df = pd.read_csv(f'{os.getcwd()}/train/dialogues.csv')
# df = df.dropna()
# df.to_csv(f'dataset/train/dialogues.csv', index=False)

# df = pd.read_csv(f'{os.getcwd()}/train/findings.csv')
# df = df.dropna()
# df.to_csv(f'dataset/train/findings.csv', index=False)

# df = pd.read_csv(f'{os.getcwd()}/train/notes.csv')
# df = df.dropna()
# df.to_csv(f'dataset/train/notes.csv', index=False)
# df = pd.read_csv(f'{os.getcwd()}/train/summarization.csv')
# df = df.dropna()
# df.to_csv(f'dataset/train/summarization.csv', index=False)

# # validation set
# df = pd.read_csv(f'{os.getcwd()}/validate/dialogues.csv')
# df = df.dropna()
# df.to_csv(f'dataset/validate/dialogues.csv', index=False)

# df = pd.read_csv(f'{os.getcwd()}/validate/findings.csv')
# df = df.dropna()
# df.to_csv(f'dataset/validate/findings.csv', index=False)

# df = pd.read_csv(f'{os.getcwd()}/validate/notes.csv')
# df = df.dropna()
# df.to_csv(f'dataset/validate/notes.csv', index=False)
# df = pd.read_csv(f'{os.getcwd()}/validate/summarization.csv')
# df = df.dropna()
# df.to_csv(f'dataset/validate/summarization.csv', index=False)

### Rag Pipeline

In [6]:
import pandas as pd
import os

train_dialogues = pd.read_csv(f'{os.getcwd()}/dataset/train/dialogues.csv')
train_findings = pd.read_csv(f'{os.getcwd()}/dataset/train/findings.csv')
train_notes = pd.read_csv(f'{os.getcwd()}/dataset/train/notes.csv')
train_summarization = pd.read_csv(f'{os.getcwd()}/dataset/train/summarization.csv')

validate_dialogues = pd.read_csv(f'{os.getcwd()}/dataset/validate/dialogues.csv')
validate_findings = pd.read_csv(f'{os.getcwd()}/dataset/validate/findings.csv')
validate_notes = pd.read_csv(f'{os.getcwd()}/dataset/validate/notes.csv')
validate_summarization = pd.read_csv(f'{os.getcwd()}/dataset/validate/summarization.csv')

#### Findings(Radiology)

In [7]:
df = train_findings
df = df.dropna(subset=['finding', 'impression'])
finding = df['finding'].tolist()
impressions = df['impression'].tolist()

In [8]:
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(finding, convert_to_tensor=False, show_progress_bar=True)

Batches: 100%|██████████| 3181/3181 [00:27<00:00, 115.87it/s]


In [9]:
index = faiss.IndexFlatIP(embeddings.shape[1])
faiss.normalize_L2(embeddings)
index.add(embeddings)

In [10]:
def retrieve_findings(query, k=5):
    query_emb = model.encode(query, convert_to_numpy=True, normalize_embeddings=True)
    faiss.normalize_L2(query_emb.reshape(1, -1))  # normalize the query
    D, I = index.search(query_emb.reshape(1, -1), k)
    results = []
    for idx in I[0]:
        if idx < 0 or idx >= len(finding):
            continue  # skip invalid indices
        results.append({
            'finding': finding[idx],
            'impression': impressions[idx]
        })
    return results

In [None]:
# Validation using zero shot
validate_df = validate_findings
generated = []
for idx, row in tqdm(validate_df.iterrows(), total=len(validate_df), desc="Validating"):
    query = row['finding']
    results = retrieve_findings(query, k=3)
    retrived_chunks = "\n\n".join([f"Finding: {res['finding']}\nImpression: {res['impression']}" for res in results])
    system_prompt = finding_zero_shot
    user_prompt = f"Finding: {query}\n\nRelevant Examples:\n{retrived_chunks}\n\nGenerate a concise impression based on the finding above."
    response = chat(
        model="llama3.1:8b",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    # print(query)
    # print("Ollama Response:\n", response['message']['content'], "\n")
    generated_impression = response['message']['content'].strip()
    generated.append({'finding': query, 'validation_impression': row['impression'], 'generated_impression': generated_impression})
    if idx >= 2000:  # Limit to first 2000
        break

Validating:   8%|▊         | 2000/25516 [18:20<3:35:40,  1.82it/s]


In [13]:
import os
os.makedirs("results_llama", exist_ok=True)
zeroShotFindingLlama = pd.DataFrame(generated)
zeroShotFindingLlama.to_csv("results_llama/zeroShotFindingLlama.csv", index=False)
print("✅ Saved results_llama/zeroShotFindingLlama.csv")

✅ Saved results_llama/zeroShotFindingLlama.csv


In [None]:
# Convert to DataFrame
gen_df = pd.DataFrame(generated)

# Prepare references and candidates
references = gen_df['validation_impression'].tolist()
candidates = gen_df['generated_impression'].tolist()

# BLEU
bleu = sacrebleu.corpus_bleu(candidates, [references])
print("BLEU:", bleu.score)

# ROUGE-L
rouge = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
rouge_scores = [rouge.score(ref, cand)['rougeL'].fmeasure for ref, cand in zip(references, candidates)]
print("ROUGE-L (mean):", sum(rouge_scores)/len(rouge_scores))

# BERTScore
P, R, F1 = bert_score(candidates, references, lang="en", rescale_with_baseline=True)
print("BERTScore F1 (mean):", float(F1.mean()))

# MedCon
# medcon_scores = [medcon_score(ref, cand) for ref, cand in zip(references, candidates)]
# print("MedCon (mean):", sum(medcon_scores)/len(medcon_scores))

BLEU: 9.35084228553478
ROUGE-L (mean): 0.3188730420583639


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BERTScore F1 (mean): 0.32010746002197266


In [11]:
# Validation using few shot
validate_df = validate_findings
generated = []
for idx, row in tqdm(validate_df.iterrows(), total=len(validate_df), desc="Validating"):
    query = row['finding']
    results = retrieve_findings(query, k=3)
    retrived_chunks = "\n\n".join([f"Finding: {res['finding']}\nImpression: {res['impression']}" for res in results])
    system_prompt = finding_few_shot
    user_prompt = f"Finding: {query}\n\nRelevant Examples:\n{retrived_chunks}\n\nGenerate a concise impression based on the finding above."
    response = chat(
        model="llama3.1:8b",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    # print(query)
    # print("Ollama Response:\n", response['message']['content'], "\n")
    generated_impression = response['message']['content'].strip()
    generated.append({'finding': query, 'validation_impression': row['impression'], 'generated_impression': generated_impression})
    if idx >= 2000:  # Limit to first 2000
        break

Validating:   8%|▊         | 2000/25516 [19:45<3:52:16,  1.69it/s]


In [13]:
import os
os.makedirs("results_llama", exist_ok=True)
fewShotFindingLlama = pd.DataFrame(generated)
fewShotFindingLlama.to_csv("results_llama/fewShotFindingLlama.csv", index=False)
print("✅ Saved results_llama/fewShotFindingLlama.csv")

✅ Saved results_llama/fewShotFindingLlama.csv


In [12]:
# Convert to DataFrame
gen_df = pd.DataFrame(generated)

# Prepare references and candidates
references = gen_df['validation_impression'].tolist()
candidates = gen_df['generated_impression'].tolist()

# BLEU
bleu = sacrebleu.corpus_bleu(candidates, [references])
print("BLEU:", bleu.score)

# ROUGE-L
rouge = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
rouge_scores = [rouge.score(ref, cand)['rougeL'].fmeasure for ref, cand in zip(references, candidates)]
print("ROUGE-L (mean):", sum(rouge_scores)/len(rouge_scores))

# BERTScore
P, R, F1 = bert_score(candidates, references, lang="en", rescale_with_baseline=True)
print("BERTScore F1 (mean):", float(F1.mean()))

# MedCon
# medcon_scores = [medcon_score(ref, cand) for ref, cand in zip(references, candidates)]
# print("MedCon (mean):", sum(medcon_scores)/len(medcon_scores))

BLEU: 9.084616258345454
ROUGE-L (mean): 0.3216817302158076


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BERTScore F1 (mean): 0.3005770146846771


#### Summerizations(Patient Questions)

In [None]:
df = train_summarization
df = df.dropna(subset=['prompt', 'response'])
prompts = df['prompt'].tolist()
responses = df['response'].tolist()

In [None]:
embeddings = model.encode(prompts, convert_to_tensor=False, show_progress_bar=True)
index = faiss.IndexFlatIP(embeddings.shape[1])
faiss.normalize_L2(embeddings)
index.add(embeddings)

In [None]:
def retrieve_summaries(query, k=5):
    query_emb = model.encode(query, convert_to_numpy=True, normalize_embeddings=True)
    faiss.normalize_L2(query_emb.reshape(1, -1))  # normalize the query
    D, I = index.search(query_emb.reshape(1, -1), k)
    results = []
    for idx in I[0]:
        if idx < 0 or idx >= len(prompts):
            continue  # skip invalid indices
        results.append({
            'prompt': prompts[idx],
            'response': responses[idx]
        })
    return results

In [None]:
validate_df = validate_summarization
generated = []
for idx, row in tqdm(validate_df.iterrows(), total=len(validate_df), desc="Validating"):
    query = row['prompt']
    if pd.isna(query):
        continue  # skip rows where prompt is NaN
    results = retrieve_summaries(query, k=3)
    retrived_chunks = "\n\n".join([f"Prompt: {res['prompt']}\nResponse: {res['response']}" for res in results])
    system_prompt = summerization_zero_shot
    user_prompt = f"Prompt: {query}\n\nRelevant Examples:\n{retrived_chunks}\n\nGenerate a concise question based on the prompt above."
    response = chat(
        model="llama3.1:8b",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    # print(query)
    # print("Ollama Response:\n", response['message']['content'], "\n")
    generated_response = response['message']['content'].strip()
    generated.append({'prompt': query, 'validation_response': row['response'], 'generated_response': generated_response})
    if idx >= 2000:  # Limit to first 2000
        break

In [None]:
import os
os.makedirs("results_llama", exist_ok=True)
zeroShotSummerizationLlama = pd.DataFrame(generated)
zeroShotSummerizationLlama.to_csv("results_llama/zeroShotSummerizationLlama.csv", index=False)
print("✅ Saved results_llama/zeroShotSummerizationLlama.csv")

In [None]:
# Convert to DataFrame
gen_df = pd.DataFrame(generated)

# Prepare references and candidates
references = gen_df['validation_impression'].tolist()
candidates = gen_df['generated_impression'].tolist()

# BLEU
bleu = sacrebleu.corpus_bleu(candidates, [references])
print("BLEU:", bleu.score)

# ROUGE-L
rouge = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
rouge_scores = [rouge.score(ref, cand)['rougeL'].fmeasure for ref, cand in zip(references, candidates)]
print("ROUGE-L (mean):", sum(rouge_scores)/len(rouge_scores))

# BERTScore
P, R, F1 = bert_score(candidates, references, lang="en", rescale_with_baseline=True)
print("BERTScore F1 (mean):", float(F1.mean()))

# MedCon
# medcon_scores = [medcon_score(ref, cand) for ref, cand in zip(references, candidates)]
# print("MedCon (mean):", sum(medcon_scores)/len(medcon_scores))

In [None]:
validate_df = validate_summarization
generated = []
for idx, row in tqdm(validate_df.iterrows(), total=len(validate_df), desc="Validating"):
    query = row['prompt']
    if pd.isna(query):
        continue  # skip rows where prompt is NaN
    results = retrieve_summaries(query, k=3)
    retrived_chunks = "\n\n".join([f"Prompt: {res['prompt']}\nResponse: {res['response']}" for res in results])
    system_prompt = summerization_few_shot
    user_prompt = f"Prompt: {query}\n\nRelevant Examples:\n{retrived_chunks}\n\nGenerate a concise question based on the prompt above."
    response = chat(
        model="llama3.1:8b",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    # print(query)
    # print("Ollama Response:\n", response['message']['content'], "\n")
    generated_response = response['message']['content'].strip()
    generated.append({'prompt': query, 'validation_response': row['response'], 'generated_response': generated_response})
    if idx >= 2000:  # Limit to first 2000
        break

In [None]:
import os
os.makedirs("results_llama", exist_ok=True)
fewShotSummerizationLlama = pd.DataFrame(generated)
fewShotSummerizationLlama.to_csv("results_llama/fewShotSummerizationLlama.csv", index=False)
print("✅ Saved results_llama/fewShotSummerizationLlama.csv")

In [None]:
# Convert to DataFrame
gen_df = pd.DataFrame(generated)

# Prepare references and candidates
references = gen_df['validation_impression'].tolist()
candidates = gen_df['generated_impression'].tolist()

# BLEU
bleu = sacrebleu.corpus_bleu(candidates, [references])
print("BLEU:", bleu.score)

# ROUGE-L
rouge = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
rouge_scores = [rouge.score(ref, cand)['rougeL'].fmeasure for ref, cand in zip(references, candidates)]
print("ROUGE-L (mean):", sum(rouge_scores)/len(rouge_scores))

# BERTScore
P, R, F1 = bert_score(candidates, references, lang="en", rescale_with_baseline=True)
print("BERTScore F1 (mean):", float(F1.mean()))

# MedCon
# medcon_scores = [medcon_score(ref, cand) for ref, cand in zip(references, candidates)]
# print("MedCon (mean):", sum(medcon_scores)/len(medcon_scores))