In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.schema import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import FAISS
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from peft import PeftModel, PeftConfig
import pandas as pd
import faiss

In [2]:
# Step 2: Load and Preprocess a Subset of the CSV Data (first 10 records)
data_path = 'hospital_data.csv'  # Path to the CSV file
df = pd.read_csv(data_path).head(10)  # Limit to the first 10 records

# Convert each row in the 'TEXT' column to a Document object
documents = [Document(page_content=text) for text in df['TEXT'].tolist()]

# Split documents into chunks of 1000 characters with 250 characters overlap
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=250)
splits = text_splitter.split_documents(documents)


In [3]:
# Step 3: Initialize BAAI Embeddings on CPU
embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en",
    model_kwargs={"device": "cpu"},  # Use CPU instead of GPU
    encode_kwargs={"normalize_embeddings": True}
)




In [4]:
# Step 4: Initialize FAISS and Store Embeddings on CPU
# Prepare the text content for each split
texts = [doc.page_content for doc in splits]


In [5]:
# Use FAISS.from_texts to initialize the FAISS vector store with LangChain
vector_db = FAISS.from_texts(texts, embeddings)


In [6]:
# Step 5: Load PEFT Model with Base Model
peft_model_name = "bhsai2709/T7_Llama3_readmission_prediction"
base_model_name = "NousResearch/Meta-Llama-3-8B-Instruct"
config = PeftConfig.from_pretrained(peft_model_name)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
model = PeftModel.from_pretrained(base_model, peft_model_name)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [13]:
# Step 6: Wrap Model in HuggingFacePipeline for LangChain Compatibility
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=50, device=0)
llm = HuggingFacePipeline(pipeline=pipe)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalL

In [14]:

# Step 7: Define Prompt and RetrievalQA Chain
prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Answer:
"""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

retriever = vector_db.as_retriever(search_kwargs={"k": 1})
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt}
)


In [15]:
# Step 7: Test the RAG System with a Sample Query
query = "A person got a throat infection and is coughing blood rarely; he also smokes cigarettes; he was in hospital for 4 days and will he be readmidded again?"
inputs = tokenizer(query, return_tensors="pt")
outputs = qa_chain.run(query)

# Decode and print the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Answer:", response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


ValueError: Input length of input_ids is 50, but `max_length` is set to 50. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.

In [17]:
# Step 6: Wrap Model in HuggingFacePipeline with Optimizations
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100, device=-1)  # Set max_new_tokens and use CPU if needed
llm = HuggingFacePipeline(pipeline=pipe)

# Step 7: Define Prompt and RetrievalQA Chain with Fewer Retrieved Documents
retriever = vector_db.as_retriever(search_kwargs={"k": 1})  # Retrieve only the top document
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt}
)

# Step 8: Run a Query
query = "A person got a throat infection and is coughing blood rarely; he also smokes cigarettes; he was in hospital for 4 days and will he be readmitted again?"
response = qa_chain.run(query)
print("Answer:", response)


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalL

Answer: Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: upright prior study probably post surgical extensive subcutaneous emphysema otherwise unchanged admission date 21701112 discharge date 21701123date birth 208648 sex fservice cardiothoracicallergiesinfluenza virus vaccineattendingfirst name3 lf 5790chief complaintdysnpea dysphagiamajor surgical invasive procedure21701114 left thoracotomy repair proximal left main stembronchus lacerationintercostal muscle flap buttress drainageofhemothorax21701114 rigid bronchoscopy flexible bronchoscopy21701112 placement 18french chest tube lefthemithoraxhistory present illness84f sp tevar ruptured thoracic aortic aneurysm 21701030she discharged 2170115 good condition presents1112 four day history weakness low grade feversshe developed worsening cough last 3 daysassociated shortness breath dysphagiaodynophagiathe cough mostly