In [1]:
!pip install -q chromadb
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install -q transformers
!pip install -q langchain
!pip install -q langchain-community==0.2.1 langchain-core==0.2.1
!pip install -q -U bitsandbytes
!pip install -q sentence_transformers
!pip install -q -U accelerate
!pip install -q unstructured
!pip install -q langchain-huggingface

In [2]:
!transformers-cli env


Copy-and-paste the text below in your GitHub issue and FILL OUT the two last points.

- `transformers` version: 4.41.2
- Platform: Windows-10-10.0.22621-SP0
- Python version: 3.11.9
- Huggingface_hub version: 0.23.3
- Safetensors version: 0.4.3
- Accelerate version: 0.30.1
- Accelerate config: 	not found
- PyTorch version (GPU?): 2.3.1+cu121 (True)
- Tensorflow version (GPU?): not installed (NA)
- Flax version (CPU?/GPU?/TPU?): not installed (NA)
- Jax version: not installed
- JaxLib version: not installed
- Using GPU in script?: <fill in>
- Using distributed or parallel set-up in script?: <fill in>



In [1]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#################################################################
# Tokenizer
#################################################################

model_name='meta-llama/Meta-Llama-3-8B-Instruct'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "bfloat16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

# Device partition
device_map = "auto"

In [4]:
#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [5]:
#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    low_cpu_mem_usage = True,
    device_map = "auto"
)

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 4/4 [00:06<00:00,  1.68s/it]


## LangChain

In [6]:
CHROMA_PATH = "chroma"
DATA_PATH = "data"

In [7]:
from bs4 import BeautifulSoup
import os

def XMLLoader(folder_path):
    reportData_list = []

    for filename in os.listdir(folder_path):
        if filename.endswith('.xml'):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as file:
                content = file.read()

            soup = BeautifulSoup(content, 'xml')

            reportData = {}
            for tag in soup.find_all():
                if tag.name != "report":
                    reportData[tag.name] = tag.text.strip()

            reportData["filename"] = filename
            reportData_list.append(reportData)

    return reportData_list

In [8]:
data =  XMLLoader("data")

In [9]:
from langchain.docstore.document import Document

documents = []
for doc in data:

    document = Document(page_content=doc["report_text"], metadata={"source": doc["filename"],
                                                                   "subtype": doc["subtype"],
                                                                   "type": doc["type"],
                                                                   "chief_complaint": doc["chief_complaint"],
                                                                   "admit_diagnosis": doc["admit_diagnosis"],
                                                                   "discharge_diagnosis": doc["discharge_diagnosis"],
                                                                   "year": doc["year"],
                                                                   "downlaod_time": doc.get("download_time", ""), # Algunos no tiemem este campo
                                                                   "deid": doc["deid"]})
    documents.append(document)

In [10]:
#documents[1].metadata["subtype"]="TEST"
#print(documents[7])

In [11]:
from langchain.embeddings import SentenceTransformerEmbeddings

embedding_model = "BAAI/bge-small-en-v1.5"

# https://huggingface.co/sentence-transformers
# https://huggingface.co/spaces/mteb/leaderboard
embeddings = SentenceTransformerEmbeddings(model_name=embedding_model)



In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

chunk_size = 200

# LLama3 - Ventana de contexto de 8000 tokens
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
    tokenizer,
    chunk_size=chunk_size,
    chunk_overlap=0,
    add_start_index=True
)

chunks = text_splitter.split_documents(documents)
print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

Split 41 documents into 357 chunks.


In [13]:
from langchain.vectorstores.chroma import Chroma

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings)

  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [14]:
from langchain_huggingface.llms import HuggingFacePipeline

# Pipeline for inference
def load_model():
    text_generation_pipeline = pipeline(
        model=model, # Model loaded in the first part
        tokenizer=tokenizer, # Tokenizer loaded in the first part
        task="text-generation",
        temperature=0.1,
        repetition_penalty=1.1,
        return_full_text=True,
        max_new_tokens=500,
        do_sample=True
    )

    hf = HuggingFacePipeline(pipeline=text_generation_pipeline, model_id="meta-llama/Meta-Llama-3-8B-Instruct")
    return hf

In [15]:
llm = load_model()

### MultiQueryRetriever

Mediante un LLM se generan múltiples queries (prompts) desde distintas perspectivas para un prompt introducido por el user. Por cada query hace un retrieval de los documentos reelevantes para obtener un set mas grande de posibles documentos con mayor variedad.

In [16]:
from langchain.retrievers.multi_query import MultiQueryRetriever

multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever = vectorstore.as_retriever(), 
    llm=llm,
)

In [17]:
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [18]:
# Default retriever (No custom prompt template)

#uestion = "Information about the patient John Smith"
#out = multi_query_retriever.invoke(question)

In [19]:
#len(out)

### Custom prompt template

In [20]:
from langchain_core.prompts import PromptTemplate

template = """
    <|start_header_id|>system<|end_header_id|> 
    You are an AI language model assistant. Your task is to generate three (3) different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search.
    Provide these alternative questions separated by newlines.
    <|eot_id|>
    <|start_header_id|>user<|end_header_id|> Original question: {question}<|eot_id|>"""

In [21]:
PROMPT = PromptTemplate(
    template=template, input_variables=["question"]
)

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(), llm=llm, prompt=PROMPT
)

In [22]:
#docs = retriever_from_llm.get_relevant_documents("Information about John Smith")
#len(docs)

## Full Chain

In [22]:
from langchain.prompts import PromptTemplate

prompt_template = """
    <|start_header_id|>system<|end_header_id|> Answer as an expert doctor. Analyse the context prvided below to answer the query. 
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Context:
    {context}
    <|eot_id|>
    <|start_header_id|>user<|end_header_id|> This is the Query: {question}<|eot_id|>"""

promptTemplate = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template
)

In [23]:
from langchain.chains import LLMChain

llm_chain = LLMChain(llm=llm, prompt=promptTemplate)

  warn_deprecated(


In [24]:
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
 {"context": retriever_from_llm, "question": RunnablePassthrough()}
    | llm_chain
)

In [25]:
answer = rag_chain.invoke("Medications of patients with Cholecystectomy")

INFO:langchain.retrievers.multi_query:Generated queries: ['<|start_header_id|>system<|end_header_id|> ', '    You are an AI language model assistant. Your task is to generate three (3) different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search.', '    Provide these alternative questions separated by newlines.', '    <|eot_id|>', '    <|start_header_id|>user<|end_header_id|> Original question: Medications of patients with Cholecystectomy<|eot_id|>assistant', '', 'Here are three alternative versions of the original question:', '', '1. What medications were prescribed to patients who underwent cholecystectomy surgery?', '2. Can you provide information on the medication regimens for individuals who have undergone gallbladder removal (cholecystectomy)?', '3. What are the common medications u

In [26]:
print(answer['text'])


    <|start_header_id|>system<|end_header_id|> Answer as an expert doctor. Analyse the context prvided below to answer the query. 
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Context:
    [Document(page_content='[Report de-identified (Safe-harbor compliant) by De-ID v.6.22.07.0]', metadata={'admit_diagnosis': '562.10', 'chief_complaint': 'DIVERTICULITIS', 'deid': 'v.6.22.07.0', 'discharge_diagnosis': '584.9,518.0,403.91,428.0,486,276.2,562.11,585.4,518.3,244.9,414.01,250.80,791.0,250.50,362.01,285.29,', 'downlaod_time': '', 'source': 'report10.xml', 'start_index': 0, 'subtype': 'CONSULT', 'type': 'HP', 'year': '2007'}), Document(page_content='[Report de-identified (Safe-harbor compliant) by De-ID v.6.22.07.0]', metadata={'admit_diagnosis': '789.00', 'chief_complaint': 'PANCREATITIS', 'deid': 'v.6.22.07.0', 'discharge_diagnosis': '577.0,428.0,496,250.02,599.0,244.9,272.4,729.1,414.00,V45.81,530.81,278.01,E933.1,714.0,715.89,721.3,

In [29]:
answer = rag_chain.invoke("What symtomps devloped patients who smoke?")

INFO:langchain.retrievers.multi_query:Generated queries: ['<|start_header_id|>system<|end_header_id|> ', '    You are an AI language model assistant. Your task is to generate three (3) different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search.', '    Provide these alternative questions separated by newlines.', '    <|eot_id|>', '    <|start_header_id|>user<|end_header_id|> Original question: What symtomps devloped patients who smoke?<|eot_id|>assistant', '', 'Here are three alternative versions of the original question:', '', 'What symptoms did patients with smoking habits experience?', '', 'Which health issues were more common in individuals who smoked regularly?', '', 'What medical conditions were associated with a history of tobacco use among patients?', '', 'These alternative quest

In [30]:
print(answer['text'])


    <|start_header_id|>system<|end_header_id|> Answer as an expert doctor. Analyse the context prvided below to answer the query. 
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Context:
    [Document(page_content='[Report de-identified (Safe-harbor compliant) by De-ID v.6.22.07.0]', metadata={'admit_diagnosis': '562.10', 'chief_complaint': 'DIVERTICULITIS', 'deid': 'v.6.22.07.0', 'discharge_diagnosis': '584.9,518.0,403.91,428.0,486,276.2,562.11,585.4,518.3,244.9,414.01,250.80,791.0,250.50,362.01,285.29,', 'downlaod_time': '', 'source': 'report10.xml', 'start_index': 0, 'subtype': 'CONSULT', 'type': 'HP', 'year': '2007'}), Document(page_content='[Report de-identified (Safe-harbor compliant) by De-ID v.6.22.07.0]', metadata={'admit_diagnosis': '789.00', 'chief_complaint': 'PANCREATITIS', 'deid': 'v.6.22.07.0', 'discharge_diagnosis': '577.0,428.0,496,250.02,599.0,244.9,272.4,729.1,414.00,V45.81,530.81,278.01,E933.1,714.0,715.89,721.3,

In [31]:
len(answer['context'])

34

In [32]:
answer = rag_chain.invoke("Give me information about Oliver Wharton")

INFO:langchain.retrievers.multi_query:Generated queries: ['<|start_header_id|>system<|end_header_id|> ', '    You are an AI language model assistant. Your task is to generate three (3) different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search.', '    Provide these alternative questions separated by newlines.', '    <|eot_id|>', '    <|start_header_id|>user<|end_header_id|> Original question: Give me information about Oliver Wharton<|eot_id|>assistant', '', 'Here are three alternative versions of the original question:', '', "Give me information about Oliver Wharton's life and achievements.", '', "What can you tell me about Oliver Wharton's contributions to society?", '', "Can you provide details about Oliver Wharton's biography, including his early life, career, and notable accomplishm

In [33]:
print(answer['text'])


    <|start_header_id|>system<|end_header_id|> Answer as an expert doctor. Analyse the context prvided below to answer the query. 
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Context:
    [Document(page_content='[Report de-identified (Safe-harbor compliant) by De-ID v.6.22.07.0]', metadata={'admit_diagnosis': '562.10', 'chief_complaint': 'DIVERTICULITIS', 'deid': 'v.6.22.07.0', 'discharge_diagnosis': '584.9,518.0,403.91,428.0,486,276.2,562.11,585.4,518.3,244.9,414.01,250.80,791.0,250.50,362.01,285.29,', 'downlaod_time': '', 'source': 'report10.xml', 'start_index': 0, 'subtype': 'CONSULT', 'type': 'HP', 'year': '2007'}), Document(page_content='[Report de-identified (Safe-harbor compliant) by De-ID v.6.22.07.0]', metadata={'admit_diagnosis': '789.00', 'chief_complaint': 'PANCREATITIS', 'deid': 'v.6.22.07.0', 'discharge_diagnosis': '577.0,428.0,496,250.02,599.0,244.9,272.4,729.1,414.00,V45.81,530.81,278.01,E933.1,714.0,715.89,721.3,

In [35]:
answer = rag_chain.invoke("Medications of Oliver Wharton")
print(answer['text'])

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
INFO:langchain.retrievers.multi_query:Generated queries: ['<|start_header_id|>system<|end_header_id|> ', '    You are an AI language model assistant. Your task is to generate three (3) different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search.', '    Provide these alternative questions separated by newlines.', '    <|eot_id|>', '    <|start_header_id|>user<|end_header_id|> Original question: Medications of Oliver Wharton<|eot_id|>assistant', '', 'Here are three alternative versions of the original question:', '', '1. What medications have been prescribed to Oliver Wharton?', "2. Can you provide information about Oliver Wharton's medication history?", "3. What records exist regar