In [10]:
!pip install -q chromadb
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install -q transformers
!pip install -q langchain
!pip install -U -q langchain-community
!pip install -q bitsandbytes
!pip install -q sentence_transformers
!pip install -q unstructured
!pip install -q accelerate

In [2]:
!transformers-cli env


Copy-and-paste the text below in your GitHub issue and FILL OUT the two last points.

- `transformers` version: 4.41.2
- Platform: Windows-10-10.0.22621-SP0
- Python version: 3.11.9
- Huggingface_hub version: 0.23.2
- Safetensors version: 0.4.3
- Accelerate version: not installed
- Accelerate config: not found
- PyTorch version (GPU?): 2.3.0+cu121 (True)
- Tensorflow version (GPU?): not installed (NA)
- Flax version (CPU?/GPU?/TPU?): not installed (NA)
- Jax version: not installed
- JaxLib version: not installed
- Using GPU in script?: <fill in>
- Using distributed or parallel set-up in script?: <fill in>



In [1]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#################################################################
# Tokenizer
#################################################################

model_name='meta-llama/Meta-Llama-3-8B-Instruct'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "bfloat16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

# Device partition
device_map = "auto"

In [4]:
#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [5]:
#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    low_cpu_mem_usage = True,
    device_map = device_map
)

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 4/4 [00:07<00:00,  1.75s/it]


### LangChain config

In [6]:
CHROMA_PATH = "chroma" # Where to store the database
DATA_PATH = "data" # Path to the documents

In [7]:
from bs4 import BeautifulSoup
import os

# Load all the documents in a dictionary and add XML tags as fields inside the dictionary
def XMLLoader(folder_path):
    reportData_list = []

    for filename in os.listdir(folder_path):
        if filename.endswith('.xml'):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as file:
                content = file.read()

            soup = BeautifulSoup(content, 'xml')

            reportData = {}
            for tag in soup.find_all():
                if tag.name != "report":
                    reportData[tag.name] = tag.text.strip()

            reportData["filename"] = filename
            reportData_list.append(reportData)

    return reportData_list

In [8]:
data =  XMLLoader("data")

In [9]:
from langchain.docstore.document import Document

# Create a "Document" class from LangChain and add all the tags as document metadata
documents = []
for doc in data:

    document = Document(page_content=doc["report_text"], metadata={"source": doc["filename"],
                                                                   "subtype": doc["subtype"],
                                                                   "type": doc["type"],
                                                                   "chief_complaint": doc["chief_complaint"],
                                                                   "admit_diagnosis": doc["admit_diagnosis"],
                                                                   "discharge_diagnosis": doc["discharge_diagnosis"],
                                                                   "year": doc["year"],
                                                                   "downlaod_time": doc.get("download_time", ""), # Algunos no tiemem este campo
                                                                   "deid": doc["deid"]})
    documents.append(document)

In [10]:
#print(documents)

### Parent document retriever

In [11]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings

parent_docs =  documents

embedding_model = "BAAI/bge-small-en-v1.5"

# https://huggingface.co/sentence-transformers
# https://huggingface.co/spaces/mteb/leaderboard
embeddings = SentenceTransformerEmbeddings(model_name=embedding_model)

  warn_deprecated(


### Text splitter

In [12]:
chunk_size = 400

child_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
    tokenizer,
    chunk_size=chunk_size,
    chunk_overlap=40,
    add_start_index=True
)

#chunks = text_splitter.split_documents(documents)
#print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

### Database

In [13]:
import shutil

# Clear out the database first if already exists.
if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

store = InMemoryStore()
vectorstore = Chroma(embedding_function=embeddings, persist_directory=CHROMA_PATH)

parent_document_retriever = ParentDocumentRetriever(
    vectorstore=vectorstore, # For child chunks
    docstore=store, # For main documents
    child_splitter=child_splitter,
    search_kwargs={"k": 1}
)

parent_document_retriever.add_documents(parent_docs, ids=None)

  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [14]:
print(f"Parent chunks (Number of reports): {len(list(store.yield_keys()))}")
print(f"Child chunks: {len(parent_document_retriever.vectorstore.get()['ids'])}")

Parent chunks (Number of reports): 41
Child chunks: 217


In [15]:
# DB keys
#parent_document_retriever.vectorstore.get()

In [16]:
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

def load_model():
    text_generation_pipeline = pipeline(
        model=model,
        tokenizer=tokenizer,
        task="text-generation",
        temperature=0.1,
        repetition_penalty=1.1,
        return_full_text=True,
        max_new_tokens=300,
        do_sample=True
    )

    hf = HuggingFacePipeline(pipeline=text_generation_pipeline)
    return hf

In [17]:
llm = load_model()

  warn_deprecated(


### Prompt template

In [25]:
# RAG

from langchain_core.prompts import ChatPromptTemplate

TEMPLATE = """
You are an expert doctor. Use the information in the medical report provided below to answer the query as a doctor would do it.

If the information is not in the report, just say that you don't know. Don't try to make up an answer.

Medical report:
{context}


Query:
{question}

"""

TEMPLATE2 = """
    <|start_header_id|>system<|end_header_id|> 
    You are an expert doctor. Use the information in the medical report provided below to answer the query.
    If the information is not in the report, just say that you don't know. Don't try to make up an answer.
    
    Medical report:
    {context}
    
    <|eot_id|>
    <|start_header_id|>user<|end_header_id|> Query: {question}<|eot_id|>"""

rag_prompt = ChatPromptTemplate.from_template(TEMPLATE2)

In [26]:
#sub_docs = vectorstore.similarity_search("Medications of Jane Doe", k=2)

In [27]:
#print(sub_docs)

In [28]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

setup_and_retrieval = RunnableParallel({"question": RunnablePassthrough(), "context": parent_document_retriever })
output_parser = StrOutputParser()

# LCEL language to create the chain (de izquierda a derecha el output de uno se pasa al otro)
parent_retrieval_chain = setup_and_retrieval | rag_prompt | llm | output_parser

In [22]:
out = parent_retrieval_chain.invoke("Hello Doctor, my name is Andrew ying and my head hurts. Do you know why?")
print(out)

Human: 
    <|start_header_id|>system<|end_header_id|> 
    You are an expert doctor. Use the information in the medical report provided below to answer the query as a doctor would do it.
    If the information is not in the report, just say that you don't know. Don't try to make up an answer.
    
    Medical report:
    [Document(page_content="[Report de-identified (Safe-harbor compliant) by De-ID v.7.01.24.0]\n\n**INSTITUTION\nEMERGENCY DEPARTMENT\nPATIENT NAME: Andrew Ying\nACCOUNT #: 789012\nDATE OF SERVICE: May 24 2024\nPRIMARY CARE PHYSICIAN: Dr. Jake Williams\nATTENDING PHYSICIAN ADDENDUM:\nThis patient was seen in conjunction with the Trauma Service as a level two \ntrauma. I was present for the patient's initial evaluation by the Trauma \nService. I did review the trauma flow sheet and agreed with its content \nunless noted otherwise below.   \nThis is a 17-year-old female patient who was the unrestrained driver in a \nmotor vehicle collision. The vehicle reportedly flipped a

In [31]:
out = parent_retrieval_chain.invoke("My name is Oliver Wharton and i forgot my medications. Can you help me?")
print(out)

Human: 
    <|start_header_id|>system<|end_header_id|> 
    You are an expert doctor. Use the information in the medical report provided below to answer the query.
    If the information is not in the report, just say that you don't know. Don't try to make up an answer.
    
    Medical report:
    [Document(page_content='[Report de-identified (Safe-harbor compliant) by De-ID v.9.17.03.0]\n\nDISCHARGE SUMMARY\nINSTITUTIONHopeville, TX 78566\n\nPATIENT NAME: Oliver Wharton\nID NUMBER: 123456\n\nDATE OF ADMISSION:      Dec 02 2019\n\nDATE OF DISCHARGE:      Dec 04 2019\n\nDISCHARGE DIAGNOSES:\n\nFebrile seizure.\nDehydration.\nSummary:  The patient is a 2-year-old male who was admitted to the emergency room with a fever of 102.5 F and a history of a witnessed seizure.  He had been ill for 2 days with runny nose, cough, and poor oral intake.  In the emergency room, he was given fluids and fever reducers.  His seizure activity ceased and he remained afebrile for over 24 hours.\n\nHOSPITAL 