In [2]:
!pip install transformers
!pip install langchain
!pip install langchain_community

Collecting transformers
  Downloading transformers-4.46.2-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.23.2 (from transformers)
  Downloading huggingface_hub-0.26.2-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.4.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting tokenizers<0.21,>=0.20 (from transformers)
  Downloading tokenizers-0.20.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.46.2-py3-none-any.whl (10.0 MB)


In [3]:
!pip install peft
!pip install pandas
!pip install faiss-gpu
!pip install faiss-cpu
!pip install sentence_transformers

Collecting peft
  Downloading peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Collecting accelerate>=0.21.0 (from peft)
  Downloading accelerate-1.1.1-py3-none-any.whl.metadata (19 kB)
Downloading peft-0.13.2-py3-none-any.whl (320 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.7/320.7 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading accelerate-1.1.1-py3-none-any.whl (333 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m333.2/333.2 kB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: accelerate, peft
Successfully installed accelerate-1.1.1 peft-0.13.2
[0mCollecting pandas
  Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downlo

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.schema import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import FAISS
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from peft import PeftModel, PeftConfig
import pandas as pd
import faiss

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Step 2: Load and Preprocess a Subset of the CSV Data (first 10 records)
data_path = 'model_df_balanced.csv'  # Path to the CSV file
df = pd.read_csv(data_path).head(3000)  # Limit to the first 10 records

# Convert each row in the 'TEXT' column to a Document object
documents = [Document(page_content=text) for text in df['TEXT'].tolist()]

# Split documents into chunks of 1000 characters with 250 characters overlap
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=250)
splits = text_splitter.split_documents(documents)

In [6]:
# Step 3: Initialize BAAI Embeddings on CPU
embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en",
    model_kwargs={"device": "cuda"},  # Use CPU instead of GPU
    encode_kwargs={"normalize_embeddings": True}
)

In [7]:
# Step 4: Initialize FAISS and Store Embeddings on CPU
# Prepare the text content for each split
texts = [doc.page_content for doc in splits]


In [8]:
# Use FAISS.from_texts to initialize the FAISS vector store with LangChain
vector_db = FAISS.from_texts(texts, embeddings)


import torch
torch.cuda.empty_cache()

import pickle 

with open('vector_db_llama3.1.pkl', 'wb') as f:
    pickle.dump(vector_db, f)

In [9]:
# Step 5: Load PEFT Model with Base Model
peft_model_name = "bhsai2709/T7_llama3.1_readmission_prediction"
base_model_name = "NousResearch/Meta-Llama-3.1-8B-Instruct"
config = PeftConfig.from_pretrained(peft_model_name)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
model = PeftModel.from_pretrained(base_model, peft_model_name)

Downloading shards: 100%|██████████| 4/4 [06:21<00:00, 95.38s/it] 
Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.51s/it]


In [10]:
# Step 6: Wrap Model in HuggingFacePipeline with Optimizations
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100, device=0)  # Set max_new_tokens and use CPU if needed
llm = HuggingFacePipeline(pipeline=pipe)

prompt_template = """

Context: {context}
Question: {question}

Answer:
"""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
# Step 7: Define Prompt and RetrievalQA Chain with Fewer Retrieved Documents
retriever = vector_db.as_retriever(search_kwargs={"k": 1})  # Retrieve only the top document
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt}
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausa

In [11]:
query = "A 58-year-old male patient with a history of Type 2 diabetes and hypertension. The patient was admitted with complaints of shortness of breath and chest discomfort. He was treated for acute congestive heart failure and showed improvement with diuretics and lifestyle counseling. Hospital stay: 5 days. Does the patient get readmitted?"
response = qa_chain.run(query)
print("Answer:", response)

  response = qa_chain.run(query)
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 

Context: previous discharge summary wasstarted neurology team upon readmission on2109111 patient transferred medicineservice 2109226 present summary reflects thehospital 228 hospital course since transfer medicineservice neurology service 2109226pt wastransferred acute renal failure1 cardiovascular patient episodes shortness ofbreath consistent congestive heart failure goal ofkeeping systolic blood pressure 150 maintaincerebral perfusion unfortunately resulted myocardialischemia secondary increased oxygen demand causing rise inby cardiology recommended medical management given thepatients comorbidities ongoing urinary tract infectionand acute renal failure thought cardiaccatheterization management patient wasseverely limited secondary poor general medical statusand high likelihood multivessel coronary arterydisease would amenable would otherwisevery high risk intervention patient continued onlopressorand nitrates hydralazine started decreaseoxygen demand andreduce afterload r

In [12]:
query = "45-year-old female with obesity and chronic kidney disease (stage 3) presented with symptoms of fatigue and fluid retention. She was admitted for treatment, including IV diuretics and dietary adjustments. Hospital stay: 7 days. Will this patient be readmitted?"
response = qa_chain.run(query)
print("Answer:", response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 

Context: response plan ms known lastname 406 24 year old woman sle ckd v multiple admissions labile hypertension transferred micu hypotension decreased alertness patient originally admitted hospital1 54 2141918 due hypertensive urgency right leghip pain recently discharged 2141914 also recently micu 97 hypertensive urgency evening noted nursing staff less arousable bp routine vitals 80s systolic responsive voice per report time medical floor team evaluated patient due concern narcosis received naloxone 04 mg x 2 little improvement first dose naloxone patient arousable second dose abg demonstrated 72639185 potassium abg 68 hemolyzed ekg demonstrate change baselineshe receive treatment hyperkalemia prior transfer cxr also done notable cardiomegaly retrocardiac opacity though similar prior 911 sleeping easily arousable alert oriented times three passing loose stool frequently doctor first name exelate overnight renal involved may try start pd today pt complaints acute abdominal 

In [13]:
query = "A 72-year-old male with a history of COPD and recent pneumonia was admitted after experiencing persistent cough and fever. Antibiotics and supportive oxygen therapy were administered, leading to gradual recovery. Hospital stay: 10 days. Is readmission likely?"
response = qa_chain.run(query)
print("Answer:", response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 

Context: consider followup chest ct examination 1230 months following completion appropriate antibiotic therapy order determine resolution stability progression nodule admission date 2160323 discharge date 2160328date birth 2092111 sex mservice company 191history present illness 68yearold male witha history chronic obstructive pulmonary diseasebronchiectasis hypertension hypercholesterolemia whowas admitted medical intensive care unit 2160323 chronic obstructive pulmonary diseaseexacerbationthe patient recent admissions chronic obstructivepulmonary disease exacerbations 21591127 in21591228 usual state health untilthree days prior admission developed acuteshortness breath cough hemoptysis orfevers chills nausea vomiting chest pain abdominal painorthopnea paroxysmal nocturnal dyspnea aproductive cough brownish sputumin emergency department saturations 70 roomair 90 7 liters oxygen given solumedroland levofloxacin well combivent nebulizers andtransferred medical intensive care u

In [14]:
query = "A 30-year-old female with a known history of anemia was admitted with severe fatigue and dizziness. After thorough evaluation, she was treated with iron supplements and dietary recommendations. Hospital stay: 3 days. Will this patient get readmitted?"
response = qa_chain.run(query)
print("Answer:", response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 

Context: two three weeks patients tsh levelwas 98 free t4 145 hematology patient noted irondeficientwith iron level 50 ferritin 89 givenapproximately 118 units red blood cells herhospital stay started lowdose oral iron 325 qd herhematocrit remained stable greater 30 remainderof hospital stay receiving blood products6 gastrointestinal patient occasionalconstipation intermittent abdominal pain abdominalpain thought likely secondary small amountof ascites gas pain serial kubs done wereall negative signs obstruction given anaggressive bowel regimen helped improve symptomsand also put po lasix also helped improve hersymptoms patient poor po intake herhospital stay felt secondary poor appetite givenboost supplements occasionally tolerated wasalso given simethicone gas paindisposition patient seen physical therapy waswalked exercised regular basis thehospital floor tolerated many exercising wasable walk greater 100 feet flight stairsthe patient transferred hospital 3668 evaluation c

In [15]:
query = "A 67-year-old male patient with coronary artery disease and a prior myocardial infarction was admitted for chest pain and underwent a successful stent placement. He received anticoagulation therapy and cardiac rehabilitation. Hospital stay: 6 days. Will he be readmitted?"
response = qa_chain.run(query)
print("Answer:", response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 

Context: hour 629 patientwent st hospital 107 hospital hospital1 189 chest pain astress test stopped due chest pain stelevation patient admitted coronary careunit started aggrastat dripthe patient transferred hospital1 188 catheterization laboratory 630 interventionthe catheterization revealed 70 proximal left anteriordescending mid90 left circumflex subtotal rightcoronary artery stented due st elevations thepatient transferred floor right arterialsheath patient continued chest pains stelevations treated intravenous nitroglycerin planwas transfer patient coronary care unit forcloser monitoring management preoperative coronaryartery bypass graftpast medical history patients past medical historyincludes hypertension hypercholesterolemia status postmyocardial infarction unstable angina patient aformer smokerpast surgical history status post appendectomyallergies known drug allergiesmedications aspirin 325 mg daily lipitor 10 mg oncedaily lopressor 50 mg twice day xanax needed ze

In [16]:
query = "A 64-year-old female with a history of atrial fibrillation and Type 2 diabetes was admitted due to worsening fatigue, mild chest pain, and difficulty breathing. Upon evaluation, she was found to have a rapid ventricular response. She was treated with beta-blockers and anticoagulation therapy, showing gradual improvement before discharge. Her hospital stay was 6 days. Is there a chance of readmission?"
response = qa_chain.run(query)
print("Answer:", response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 

Context: atrial fibrillation rapid ventricular responseup 200s initial response lopressorthus started diltiazem drip transferred tohospital ward name 19024 intense monitoringhe seen cardiology recommended employingantiarrhythmics time rather increasing doseof nadolol stopping diltiazem donehowever patient became hypotensive several hours beinggiven beta blocker required fluid boluses andtransient dopamine transferred back theintensive care unit overnightpatient rapidly weaned dopamine although rateremained difficult control without beta blockers heremained diltiazem diltiazem drip underwentdc cardioversion prior discharge remainedin normal sinus rhythm throughout course stay5 renal patient creatinine 18 admissionwhich improved 12 hydration creatininefluctuated course hospital stay depending onhis fluid status time discharge creatinineagain 18 following brisk diuresis torelieve ascites felt patientscreatinine would turn back towards baseline postdiuresis6 pulmonary patient ade

In [18]:
query = "A 40-year-old male with a past history of multiple substance use and depression was admitted following an episode of acute pancreatitis, likely linked to alcohol use. He was stabilized with IV fluids, pain control, and counseling for substance cessation. His hospital stay was 5 days. Is he likely to be readmitted?"
response = qa_chain.run(query)
print("Answer:", response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 

Context: againstmedical advice psychiatry deem competent tomake decisionthe patient given extensive list detoxificationcenters could potentially go weekendhowever free care beds available time thepatients departure hospital also given alist telephone numbers centers related themanagement alcohol abuse including alcohol anonymous andalcohol drug referral hot lines patient said thatshe plans attend detoxification facility comingmondayshe left medical advice boyfriend theevening 723 full explanation risks ofleaving hospital given current medical problems2 pancreatitis issues patient presented elevatedlipase levels however extent previousadmission continue decrease hospitalstay tolerate full meals without nauseavomiting abdominal pain 723 patient reportednever abdominal symptoms despite evidence ofpancreatitis related extensive alcohol abuse3 alcoholic hepatitis issues patient history ofelevated transaminases trended toward normalrange still mildly elevated 723 deniedany history 

In [19]:
query = "A 78-year-old female with osteoporosis and a history of multiple falls was admitted with a hip fracture. She underwent successful surgical repair and was started on physical therapy postoperatively. Her hospital stay lasted 8 days. What are the chances of readmission?"
response = qa_chain.run(query)
print("Answer:", response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 

Context: recoveryby postoperative day 3 patient transitioned alloral medications remained complete bedrest given hisfractured right hip temporary pacing wires removedon postoperative day 4 postoperative day 5 wasscheduled go operating room orif righthip however day inadvertently breakfastand surgery delayed 1 day therefore on323 patient brought operating room atwhich time underwent repair fractured right hipby dr last name stitle please see report full details insummary patient orif fractured right hiphe remained hemodynamically stable throughout procedurewas transferred operating room postanesthesia recovery unit following recovery fromanesthesia patient transferred 525 forcontinuing postoperative care rehabilitationover next 2 days patient uneventfulpostoperative course activity level increased withthe assistance nursing staff well physicaltherapy staff postoperative days 7 1 wascleared transfer rehabilitation ongoingpostoperative careat time dictation patients physicalexam

In [20]:
query = "A 60-year-old male with a background of poorly controlled hypertension and recent transient ischemic attack (TIA) was admitted for severe headaches and elevated blood pressure. He was stabilized on antihypertensive medication and educated on lifestyle modifications. Hospital stay: 4 days. Is there a chance he will be readmitted?"
response = qa_chain.run(query)
print("Answer:", response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 

Context: response decrease pain plan cont monitor hypertension malignant hypertensive crisis hypertensive emergency assessment bp 200100 arrival clonodine patch noted back per pt3mg placed wednesday action last name titles 5791 gtt continued titrated currently held response bp trending 160180 90120 plan cont last name titles 5791 gtt titrate goal sbp 180100 likely resuming home bp meds tolerated knowledge deficit assessment sleepy arrival slept throughout night stated performing pd w 25 solution111 x day sometimes daily sometimes every day action sleeping overnight response plan pd education add unable draw labs phlebotomy paged x3 md aware 24yof w esrd htn started od week presented w 1 day hx acute onset nv diarrhea sharp abd pain last pt presented ed 1011 w hypotension sbp 80s approx 15 l dialysis setting taking po antihtns received 5l ivf labs wnl dcd home pt reports last nigt woke w sever sharp abd pain 6 episodes frothy emesis 10 yellowish bms wo melena brbpr pt unable k

In [21]:
query = "A 55-year-old male with chronic liver disease and portal hypertension was admitted after presenting with signs of jaundice, abdominal distension, and confusion. He was diagnosed with hepatic encephalopathy and treated with lactulose and diuretics. After stabilization, he was discharged following a 9-day stay. Should we anticipate a potential readmission?"
response = qa_chain.run(query)
print("Answer:", response)

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 

Context: hematocrit didremain stable remainder hospital stay wascontinued protonpump inhibitor7 regard cirrhosis documented fromabdominal ultrasound 2119 patient evidence ofchronic lft elevation coagulopathy haveongoing transaminitis therefore right upper quadrantultrasound obtained 327 patient treatedinitially lactulose hospital course whichwas discontinued due persistent diarrhea8 fluids electrolytes nutrition patient giventhiamine folate multivitamin due history ofalcohol abuse poor nutrition electrolytes weremonitored closely transient hypernatremiawhich resolve iv fluid administration wasevaluated speech swallow service 325 whocleared soft solids thin liquids poorswallow likely secondary intubationdischarge condition afebrile hemodynamically stable alertand oriented times three improved respiratory statusdischarge status rehabilitationdischarge diagnoses1 chest pain evidence myocardial damage2 cocaine intoxication3 alcohol withdrawal4 delirium5 gastrointestinal bleed6 he