In [1]:
#%pip install pandas
#%pip install numpy
#%pip install langchain
#%pip install langchain-community
#%pip install langchain-groq
#%pip install sentence-transformers
#%pip install -U langchain-community faiss-cpu langchain-openai tiktoken

In [2]:
#D6253A5CE371EA- example with gastrointestinal bleed should continue PPI 
#DFAFED1811B871- example with a documented history of a GI bleed should continue PPI
#D4865B8BBB294E- example with GI bleeding that occurred during inpatient stay should continue PPI
#D00F3A8D5F43B2- example with symptoms of heartburn (upper GI symptoms) evaluate for endoscopy to determine if PPI is needed
#D8017C77BA15FA- 14d h pylori treatment if pt treated for 2wks and asymptomatic, stop PPI
#DA15CCF42ACF8B- Barrett's esophagus, continue PPI
#D45728A2EFD315- example with esophagitis, continue PPI
#DFB07E6B8F0957- example with chronic NSAID use and pharmacist deprescribing note to evaluate for PPI
#D1BD3665C06499- example where pt should be on due to history of surgery and chronic GERD symptoms
#D068E26FFF9F43- example where pt came in on and no clear reason to continue
#D0E44FD0BBD96F- example where pt came in on and no clear reason to continue
#D17AFAD1D7BB68- should stop PPI based on Fundoplication being performed while pt was in surgery
#DF7052FF1CE151- example where the pt has GERD and has been marked to continue PPI


In [3]:
import pandas as pd
import numpy as np
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
import os 
import json
from langchain_core.runnables import RunnablePassthrough

#define groq key 
groq_key = os.environ['groqkey']


In [4]:
#model variables
llama_70 = 'llama3-70b-8192'
llama_31 = 'llama-3.1-70b-versatile'
mixtral = "mixtral-8x7b-32768"
gemma_2 = 'gemma2-9b-it'
llama_tool_70 = 'llama3-groq-70b-8192-tool-use-preview'

gemma = ChatGroq(
    temperature=0,
    model=gemma_2,
    api_key=groq_key 
)

mixtral = ChatGroq(
    temperature=0,
    model=mixtral,
    api_key=groq_key 
)

llama3 = ChatGroq(
    temperature=0,
    model=llama_70,
    api_key=groq_key 
)

llama_tool = ChatGroq(
    temperature=0,
    model=llama_70,
    api_key=groq_key 
)

llama_3_1 = ChatGroq(
    temperature=0,
    model=llama_31,
    api_key=groq_key 
)

In [5]:
system = "You are a knowledgeable medical provider who specializes in medication management."
human = "{text}"
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

chain = prompt | llama_3_1
chain.invoke({"text": "Explain what should be evaluated for PPI deprescribing for a patient at discharge."})

AIMessage(content="When considering PPI (Proton Pump Inhibitor) deprescribing for a patient at discharge, several factors should be evaluated to ensure safe and effective discontinuation. Here are the key considerations:\n\n1. **Indication for PPI use**: Review the original reason for starting the PPI. If the indication was for a short-term condition, such as gastroesophageal reflux disease (GERD) or peptic ulcer disease, and the condition has resolved, deprescribing may be appropriate.\n2. **Duration of PPI use**: Assess the length of time the patient has been taking the PPI. Patients who have been on PPIs for less than 8 weeks are generally considered low-risk for rebound acid hypersecretion and may be more likely to tolerate discontinuation.\n3. **Dose and frequency**: Evaluate the dose and frequency of the PPI. Patients on higher doses or more frequent dosing may require a more gradual taper to minimize rebound symptoms.\n4. **Symptom control**: Assess the patient's current symptom

In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
#embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
text_splitter = RecursiveCharacterTextSplitter()

In [7]:
#from transformers import AutoTokenizer, AutoModel
#import torch
#
#class ClinicalBERTEmbeddings:
#    def __init__(self, model_name="emilyalsentzer/Bio_ClinicalBERT"):
#        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
#        self.model = AutoModel.from_pretrained(model_name)
#
#    def embed(self, text):
#        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True)
#        with torch.no_grad():
#            outputs = self.model(**inputs)
#            embeddings = outputs.last_hidden_state[:, 0, :]  # Use the [CLS] token's embedding
#        return embeddings

# Example usage
#embedding_model = ClinicalBERTEmbeddings()

In [8]:
embeddings = HuggingFaceEmbeddings(model_name="emilyalsentzer/Bio_ClinicalBERT")

  embeddings = HuggingFaceEmbeddings(model_name="emilyalsentzer/Bio_ClinicalBERT")
  from tqdm.autonotebook import tqdm, trange
No sentence-transformers model found with name emilyalsentzer/Bio_ClinicalBERT. Creating a new one with mean pooling.


In [9]:
def extract_json_from_content(content):
    # Find the JSON part within the content
    start_index = content.find('{')
    end_index = content.rfind('}') + 1
    json_str = content[start_index:end_index]

    # Parse the JSON string
    parsed_json = json.loads(json_str)
    
    return parsed_json

In [10]:
base_path = "F:/LangChain/data/"
#noteConcepts = pd.read_csv(base_path + 'noteConcepts.txt', sep="|")
encounters = pd.read_csv(base_path + 'encounters.txt', sep="|")
#orders = pd.read_csv(base_path + 'orders.txt', sep="|")
#hospitalAcquiredDx = pd.read_csv(base_path + 'hospitalAcquiredDx.txt', sep="|")
#noteText = pd.read_csv(base_path + 'noteText.txt', sep="|")
#presentOnAdmitDx = pd.read_csv(base_path + 'presentOnAdmitDx.txt', sep="|")
#select random EncounterKey
noteText = pd.read_csv(base_path + 'noteText.csv')
encounterKey = 'DA15CCF42ACF8B'
#encounterKey = encounters.sample(1)['EncounterKey'].values[0]

In [11]:
#select 30 random encounterKeys

encounterKeys = encounters.sample(30)['EncounterKey'].values

In [12]:
#filter noteText for encounterKey
noteText = noteText[noteText['EncounterKey'].isin(encounterKeys)]

In [11]:
#filter all dfs to the selected EncounterKey
#noteConcepts = noteConcepts[noteConcepts['EncounterKey'] == encounterKey]
#orders = orders[orders['EncounterKey'] == encounterKey]
#hospitalAcquiredDx = hospitalAcquiredDx[hospitalAcquiredDx['EncounterKey'] == encounterKey]
#noteText = noteText[noteText['EncounterKey'] == encounterKey]
#presentOnAdmitDx = presentOnAdmitDx[presentOnAdmitDx['EncounterKey'] == encounterKey]
#encounters = encounters[encounters['EncounterKey'] == encounterKey]

In [15]:
noteText.describe()
#noteText.head()

Unnamed: 0,deid_note_key,EncounterKey,NoteDate,DepartmentSpecialty,ProviderSpecialty,ProviderType,NoteText
count,1364,1364,1364,1364,938,1364,1364
unique,1364,30,51,2,39,8,1253
top,D001B80289FDF7,D671A9D60DED51,2023-09-19,Inpatient Nursing,UCSF,Physician,The following orders were created for panel or...
freq,1,160,87,1346,317,426,31


In [14]:
#filter noteTest to only include ProviderType in ['Resident', 'Physician', 'Registered Nurse', 'Nurse Practitioner',  'Physician Assistant', 'Pharmacist', 'Licensed Vocational Nurse', 'Medical Student', 'Pharmacy Student', 'Nursing Student', ’'Registered Dietitian', 'Dietetic Intern'
noteText = noteText[noteText['ProviderType'].isin(['Resident', 'Physician', 'Registered Nurse', 'Nurse Practitioner',  'Physician Assistant', 'Pharmacist', 'Licensed Vocational Nurse', 'Medical Student', 'Pharmacy Student', 'Nursing Student', 'Registered Dietitian', 'Dietetic Intern'])]

In [16]:
from langchain_community.document_loaders import DataFrameLoader
loader = DataFrameLoader(
 data_frame = noteText,
 page_content_column = "NoteText",
 engine="pandas",
)

documents = loader.load_and_split()

In [17]:
print(documents[10])
print(len(documents))


page_content='Advance Care Planning     Discussed with: ***** ***** ***** ***** ***** *****    Summary of discussion:   In the setting of his overall stable illness Mr. ***** shared that he would be interested in all life-saving measures such as chest compressions and intubation. He shares that although this has been a challenging year given his pneumonia and car accident he leads a happy life overall. In particular he enjoys walking along a ***** with a view of the ***** ***** ***** at *****'s End. He shares that he has a positive relationship with his ***** year-old son who lives with him part-time and his first wife whom he trusts to make medical decisions for him should he be unable to. When asked about life-prolonging measures he asks that we do ""anything that gives me five more years.""    Surrogate decision maker:   Identified as:   Name: ***** ***** (*****-*****-*****)  Relationship to patient: Former wife  Surrogate is aware that they are designated surrogate? Unknown    ****

In [18]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS


vector_store = FAISS.from_documents(documents, embeddings)
#vecor_store = FAISS.from_documents(documents, embedding_model)

In [50]:
results = vector_store.similarity_search_with_score("Does this patient have severe esophagitis?",k=10)
    #,search_type="similarity_score_threshold",
    #search_kwargs={'score_threshold': 0.99})

In [51]:
for doc, score in results: 
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

* [SIM=30.535692] that would be treated by IV antibiotics at this point recommend stopping antibiotics and monitoring. It sounds like her encephalopathy began with lack of access to lactulose and of course this would set her up for aspiration.    #Leukocytosis now resolved    Dx  - follow up " [{'deid_note_key': 'DABCCB7FDDF21B', 'EncounterKey': 'D671A9D60DED51', 'NoteDate': '2023-09-01', 'DepartmentSpecialty': 'Inpatient Nursing', 'ProviderSpecialty': 'Hospital Medicine', 'ProviderType': 'Physician'}]
* [SIM=32.456718] headache meningismus sinus pain fevers and hypotension concerning for infectious meningitis. Source could be bacterial viral or fungal given recent liver transplant and immunosuppression. Could also have overlapping bile leak and infected biloma. Differential also " [{'deid_note_key': 'DD0D1854C965E2', 'EncounterKey': 'D8D46ABC7D3697', 'NoteDate': '2023-09-04', 'DepartmentSpecialty': 'Inpatient Nursing', 'ProviderSpecialty': 'General Surgery', 'ProviderType': 'Physician

In [23]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [24]:
retriever = vector_store.as_retriever(search_type="similarity", k=10)

In [25]:
system = "You are a knowledgeable medical provider who specializes in medication management. Given a list of diagnosis and some snippets from patients notes {context}, answer if the patient notes contain any of the diagnosis."
prompt = ChatPromptTemplate.from_messages(
        [
        ("system", system), ("human", "{input}")
        ])
prompt.pretty_print()


You are a knowledgeable medical provider who specializes in medication management. Given a list of diagnosis and some snippets from patients notes [33;1m[1;3m{context}[0m, answer if the patient notes contain any of the diagnosis.


[33;1m[1;3m{input}[0m


In [26]:
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [38]:
rag_chain = (
        RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
        | prompt 
        | llama_3_1
        | StrOutputParser()
)


retrieve_docs =  {"context": retriever | format_docs,
                "input": RunnablePassthrough()}

retrieve_docs = (lambda x: x["input"]) | retriever

chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain
)

In [47]:
result = chain.invoke({"input":"""Based on the information from the note context, if a patient has a clear indication to continue a proton pump inhibitor PPI."""})
print(result["answer"])

Based on the information provided in the patient notes, there are several indications that suggest the patient may benefit from continuing a proton pump inhibitor (PPI). These include:

1. Gastritis: The patient has a history of gastritis, which is a condition characterized by inflammation of the stomach lining. PPIs are commonly used to treat gastritis by reducing stomach acid production.
2. LGI bleed: The patient has a history of a lower gastrointestinal (LGI) bleed, which may be related to gastrointestinal ulcers or other conditions that can be managed with PPIs.
3. Cholecystitis s/p cholecystectomy: The patient has a history of cholecystitis (inflammation of the gallbladder) and has undergone a cholecystectomy (removal of the gallbladder). PPIs may be used to manage symptoms of bile reflux or other gastrointestinal conditions that may occur after cholecystectomy.

Given these indications, it is likely that the patient would benefit from continuing a PPI to manage their gastrointest

In [48]:
print(result.keys())

dict_keys(['input', 'context', 'answer'])


In [49]:
result["context"]

[Document(metadata={'deid_note_key': 'DABCCB7FDDF21B', 'EncounterKey': 'D671A9D60DED51', 'NoteDate': '2023-09-01', 'DepartmentSpecialty': 'Inpatient Nursing', 'ProviderSpecialty': 'Hospital Medicine', 'ProviderType': 'Physician'}, page_content='that would be treated by IV antibiotics at this point recommend stopping antibiotics and monitoring. It sounds like her encephalopathy began with lack of access to lactulose and of course this would set her up for aspiration.    #Leukocytosis now resolved    Dx  - follow up "'),
 Document(metadata={'deid_note_key': 'D35A9A9003CFEB', 'EncounterKey': 'D079332799772C', 'NoteDate': '2023-09-15', 'DepartmentSpecialty': 'Inpatient Nursing', 'ProviderSpecialty': 'Nurse Practitioner', 'ProviderType': 'Nurse Practitioner'}, page_content='When you presented to the emergency department you were found to have an acute cardiac event called an ST elevation myocardial infarction and went to the cardiac catheterization lab.  There you had a left heart catheteri