In [1]:
#%pip install pandas
#%pip install numpy
#%pip install langchain
#%pip install langchain-community
#%pip install langchain-groq
#%pip install sentence-transformers
#%pip install -U langchain-community faiss-cpu langchain-openai tiktoken

In [None]:
#D6253A5CE371EA- example with gastrointestinal bleed should continue PPI 
#DFAFED1811B871- example with a documented history of a GI bleed should continue PPI
#D4865B8BBB294E- example with GI bleeding that occurred during inpatient stay should continue PPI
#D00F3A8D5F43B2- example with symptoms of heartburn (upper GI symptoms) evaluate for endoscopy to determine if PPI is needed
#D8017C77BA15FA- 14d h pylori treatment if pt treated for 2wks and asymptomatic, stop PPI
#DA15CCF42ACF8B- Barrett's esophagus, continue PPI
#D45728A2EFD315- example with esophagitis, continue PPI
#DFB07E6B8F0957- example with chronic NSAID use and pharmacist deprescribing note to evaluate for PPI
#D1BD3665C06499- example where pt should be on due to history of surgery and chronic GERD symptoms
#D068E26FFF9F43- example where pt came in on and no clear reason to continue
#D0E44FD0BBD96F- example where pt came in on and no clear reason to continue
#D17AFAD1D7BB68- should stop PPI based on Fundoplication being performed while pt was in surgery

In [1]:
import pandas as pd
import numpy as np
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
import os 
import json
from langchain_core.runnables import RunnablePassthrough

#define groq key 
groq_key = os.environ['groqkey']


In [2]:
#model variables
llama_70 = 'llama3-70b-8192'
llama_31 = 'llama-3.1-70b-versatile'
mixtral = "mixtral-8x7b-32768"
gemma_2 = 'gemma2-9b-it'
llama_tool_70 = 'llama3-groq-70b-8192-tool-use-preview'

gemma = ChatGroq(
    temperature=0,
    model=gemma_2,
    api_key=groq_key 
)

mixtral = ChatGroq(
    temperature=0,
    model=mixtral,
    api_key=groq_key 
)

llama3 = ChatGroq(
    temperature=0,
    model=llama_70,
    api_key=groq_key 
)

llama_tool = ChatGroq(
    temperature=0,
    model=llama_70,
    api_key=groq_key 
)

llama_3_1 = ChatGroq(
    temperature=0,
    model=llama_31,
    api_key=groq_key 
)

In [3]:
system = "You are a knowledgeable medical provider who specializes in medication management."
human = "{text}"
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

chain = prompt | llama_3_1
chain.invoke({"text": "Explain what should be evaluated for PPI deprescribing for a patient at discharge."})

AIMessage(content="When considering PPI (Proton Pump Inhibitor) deprescribing for a patient at discharge, several factors should be evaluated to ensure safe and effective discontinuation. Here are the key considerations:\n\n1. **Indication for PPI use**: Review the original reason for starting the PPI. If the indication was for a short-term condition, such as gastroesophageal reflux disease (GERD) or peptic ulcer disease, and the condition has resolved, deprescribing may be appropriate.\n2. **Duration of PPI use**: Assess the length of time the patient has been taking the PPI. Patients who have been on PPIs for less than 8 weeks are generally considered low-risk for rebound acid hypersecretion and may be more likely to tolerate discontinuation.\n3. **Dose and frequency**: Evaluate the dose and frequency of the PPI. Patients on lower doses or less frequent dosing regimens may be more likely to tolerate discontinuation.\n4. **Symptom control**: Assess the patient's current symptoms. If s

In [4]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
text_splitter = RecursiveCharacterTextSplitter()

  embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
  from tqdm.autonotebook import tqdm, trange


In [5]:
def extract_json_from_content(content):
    # Find the JSON part within the content
    start_index = content.find('{')
    end_index = content.rfind('}') + 1
    json_str = content[start_index:end_index]

    # Parse the JSON string
    parsed_json = json.loads(json_str)
    
    return parsed_json

In [6]:
base_path = "F:/LangChain/data/"
#noteConcepts = pd.read_csv(base_path + 'noteConcepts.txt', sep="|")
encounters = pd.read_csv(base_path + 'encounters.txt', sep="|")
#orders = pd.read_csv(base_path + 'orders.txt', sep="|")
#hospitalAcquiredDx = pd.read_csv(base_path + 'hospitalAcquiredDx.txt', sep="|")
#noteText = pd.read_csv(base_path + 'noteText.txt', sep="|")
#presentOnAdmitDx = pd.read_csv(base_path + 'presentOnAdmitDx.txt', sep="|")
#select random EncounterKey
noteText = pd.read_csv(base_path + 'noteText.csv')
encounterKey = 'DA15CCF42ACF8B'
#encounterKey = encounters.sample(1)['EncounterKey'].values[0]

In [7]:
#filter all dfs to the selected EncounterKey
#noteConcepts = noteConcepts[noteConcepts['EncounterKey'] == encounterKey]
#orders = orders[orders['EncounterKey'] == encounterKey]
#hospitalAcquiredDx = hospitalAcquiredDx[hospitalAcquiredDx['EncounterKey'] == encounterKey]
noteText = noteText[noteText['EncounterKey'] == encounterKey]
#presentOnAdmitDx = presentOnAdmitDx[presentOnAdmitDx['EncounterKey'] == encounterKey]
#encounters = encounters[encounters['EncounterKey'] == encounterKey]

In [8]:
print(noteText.describe())

noteText.head()

         deid_note_key    EncounterKey    NoteDate DepartmentSpecialty  \
count               28              28          28                  28   
unique              28               1           5                   1   
top     D01E5762A11718  DA15CCF42ACF8B  2023-09-04   Inpatient Nursing   
freq                 1              28          10                  28   

        ProviderSpecialty ProviderType  \
count                  13           27   
unique                  3           10   
top     Hospital Medicine    Physician   
freq                    7            8   

                                                 NoteText  
count                                                  28  
unique                                                 28  
top           CASE MANAGEMENT DISCHARGE     CASE MANAG...  
freq                                                    1  


Unnamed: 0,deid_note_key,EncounterKey,NoteDate,DepartmentSpecialty,ProviderSpecialty,ProviderType,NoteText
276,D01E5762A11718,DA15CCF42ACF8B,2023-09-05,Inpatient Nursing,,Case Manager,CASE MANAGEMENT DISCHARGE CASE MANAG...
639,D04D08F4BD461D,DA15CCF42ACF8B,2023-09-01,Inpatient Nursing,Emergency Medicine,Physician,"XR KNEE 3 VIEWS LEFT 09/01/2023 9:51 PM"" ..."
665,D0501204CCC1D5,DA15CCF42ACF8B,2023-09-02,Inpatient Nursing,,Nurse Practitioner,GERIATRICS ED CONSULT NOTE The *****/GED (...
870,D06A180EC8A930,DA15CCF42ACF8B,2023-09-04,Inpatient Nursing,,Physical Therapist,PHYSICAL THERAPY INITIAL EVALUATION PT rele...
5537,D2D253B7F254E0,DA15CCF42ACF8B,2023-09-05,Inpatient Nursing,Hospital Medicine,Physician,Dear ***** ***** You were admitted for Pulm...


In [9]:
from langchain_community.document_loaders import DataFrameLoader
loader = DataFrameLoader(
 data_frame = noteText,
 page_content_column = "NoteText",
 engine="pandas",
)

documents = loader.load_and_split()

In [10]:
print(documents[10])
print(len(documents))


page_content='Patient Demographics     Address  ***** *****. ***** ***** ***** ***** *****  ***** ***** ***** ***** Phone  *****-*****-***** (Home) *****Preferred*****  *****-*****-***** (Mobile) E-mail Address  *****@*****.*****      Cover Sheet      Discharge Attending *****  ***** Attending: ***** ***** ***** MD  Office Phone: *****-*****-*****  Pager: ***** ***** ***** Md  Note: These phone numbers may not be answered or checked 24/7. If you do not receive a response please call *****-*****-***** and ask the operator to page ***** ***** ***** MD or the covering physician.  The patient's Primary Care Physician may also be able to assist with questions regarding long term management. Primary Care Provider  Name: ***** ***** ***** *****  Address:No address on file  Phone:None  Fax:415-353-2703     Emergency Contact  Extended Emergency Contact Information  Primary Emergency Contact: **********  ***** Phone: *****-*****-*****  Relation: Mother   Discharge Case Manager  Name:  ***** ****

In [11]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS


vector_store = FAISS.from_documents(documents, embeddings)

In [12]:
vector_store.similarity_search_with_score("how old is the patient?", search_type="similarity", k=5)

[(Document(metadata={'deid_note_key': 'D0501204CCC1D5', 'EncounterKey': 'DA15CCF42ACF8B', 'NoteDate': '2023-09-02', 'DepartmentSpecialty': 'Inpatient Nursing', 'ProviderSpecialty': nan, 'ProviderType': 'Nurse Practitioner'}, page_content='GERIATRICS ED CONSULT NOTE     The *****/GED (Age-friendly emergency department or Geriatric ED) Consult Service assessed this patient as part of our older adult care pathway. The following is a summary and the details can be found in the A/P. This note will be routed to the PCP and any care recommendations will be shared with ED providers and staff and inpatient services as appropriate.    My date of service is 09/02/2023.    Geriatrics ED consult requested by ***** ***** ***** of the Emergency Medicine service for evaluation of geriatric syndromes.    AGE FRIENDLY Emergency Department Consult Note (Geriatrics)  Assessment and Plan Summary    The *****/GED (Age-friendly emergency department or Geriatric ED) Consult Service evaluated the patient at **

In [13]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [14]:
retriever = vector_store.as_retriever(search_type="similarity", k=5)

In [15]:
system = "You are a knowledgeable medical provider who specializes in medication management. Given a list of diagnosis and some snippets from patients notes {context}, answer if the patient notes contain any of the diagnosis."
prompt = ChatPromptTemplate.from_messages(
        [
        ("system", system), ("human", "{input}")
        ])
prompt.pretty_print()


You are a knowledgeable medical provider who specializes in medication management. Given a list of diagnosis and some snippets from patients notes [33;1m[1;3m{context}[0m, answer if the patient notes contain any of the diagnosis.


[33;1m[1;3m{input}[0m


In [16]:
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [17]:
rag_chain = (
        {"context": retriever | format_docs,
         "input": RunnablePassthrough()}
        | prompt 
        | llama_3_1
        | StrOutputParser()
)


In [18]:
response = rag_chain.invoke("""Based on the information from the note context, does the patient have any of the following:
              1. Mild to moderate esoophagitis
              2. GERD 
              3. Peptic Ulcer Disease
              4. Upper GI symptoms
              5. ICU Stress Ulcer Prophylaxis
              6. Barrett's Esophagus
              7. Chronic NSAID use with bleeding risk
              8. Severe esophagitis
              9. Documented history of bleeding GI ulcer
              10. Explain the reasoning for your answer
            Return the answer for each of these as a formatted JSON object with the key being the condition and the value being a boolean value for the first 9.  For the final question, return a string with the reasoning for your answer.""")
print(response)

```json
{
  "1. Mild to moderate esoophagitis": false,
  "2. GERD": true,
  "3. Peptic Ulcer Disease": false,
  "4. Upper GI symptoms": false,
  "5. ICU Stress Ulcer Prophylaxis": false,
  "6. Barrett's Esophagus": true,
  "7. Chronic NSAID use with bleeding risk": false,
  "8. Severe esophagitis": false,
  "9. Documented history of bleeding GI ulcer": false,
  "10. Reasoning": "The patient's medical history indicates that they have GERD (gastroesophageal reflux disease) and Barrett's esophagus, which is a complication of GERD. However, there is no mention of mild to moderate esophagitis, peptic ulcer disease, upper GI symptoms, ICU stress ulcer prophylaxis, severe esophagitis, or a documented history of bleeding GI ulcer. Additionally, there is no indication of chronic NSAID use with bleeding risk."
}
```

Note: The patient is taking omeprazole (PRILOSEC) which is a proton pump inhibitor (PPI) commonly used to treat GERD and other acid-related disorders. This further supports the pres

In [47]:
rag_chain = (
        RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
        | prompt 
        | llama_3_1
        | StrOutputParser()
)


retrieve_docs =  {"context": retriever | format_docs,
                "input": RunnablePassthrough()}

retrieve_docs = (lambda x: x["input"]) | retriever

chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain
)

In [48]:
result = chain.invoke({"input":"""Based on the information from the note context, does the patient have any of the following:
              1. Mild to moderate esophagitis
              2. GERD 
              3. Peptic Ulcer Disease
              4. Upper GI symptoms
              5. ICU Stress Ulcer Prophylaxis
              6. Barrett's Esophagus
              7. Chronic NSAID use with bleeding risk
              8. Severe esophagitis
              9. Documented history of bleeding GI ulcer
              10. Explain the reasoning for your answer
            Return the answer for each of these as a formatted JSON object with the key being the condition and the value being a boolean value for the first 9.  For the final question, return a string with the reasoning for your answer."""})
print(result)

{'input': "Based on the information from the note context, does the patient have any of the following:\n              1. Mild to moderate esoophagitis\n              2. GERD \n              3. Peptic Ulcer Disease\n              4. Upper GI symptoms\n              5. ICU Stress Ulcer Prophylaxis\n              6. Barrett's Esophagus\n              7. Chronic NSAID use with bleeding risk\n              8. Severe esophagitis\n              9. Documented history of bleeding GI ulcer\n              10. Explain the reasoning for your answer\n            Return the answer for each of these as a formatted JSON object with the key being the condition and the value being a boolean value for the first 9.  For the final question, return a string with the reasoning for your answer.", 'context': [Document(metadata={'deid_note_key': 'D4924D4A405D5E', 'EncounterKey': 'DA15CCF42ACF8B', 'NoteDate': '2023-09-02', 'DepartmentSpecialty': 'Inpatient Nursing', 'ProviderSpecialty': 'UCSF', 'ProviderType': 'N

In [49]:
print(result.keys())

dict_keys(['input', 'context', 'answer'])


In [50]:
extract_json_from_content(result["answer"])

{'1. Mild to moderate esoophagitis': False,
 '2. GERD': True,
 '3. Peptic Ulcer Disease': False,
 '4. Upper GI symptoms': False,
 '5. ICU Stress Ulcer Prophylaxis': False,
 "6. Barrett's Esophagus": True,
 '7. Chronic NSAID use with bleeding risk': False,
 '8. Severe esophagitis': False,
 '9. Documented history of bleeding GI ulcer': False,
 '10. Reasoning': "The patient's medical history indicates that they have GERD (gastroesophageal reflux disease) and Barrett's esophagus, which is a complication of GERD. However, there is no mention of mild to moderate esophagitis, peptic ulcer disease, upper GI symptoms, ICU stress ulcer prophylaxis, severe esophagitis, or a documented history of bleeding GI ulcer. Additionally, there is no indication of chronic NSAID use with bleeding risk."}

In [51]:
#from the result["answer"] extract only #10. Reasoning key value pair
reasoning = extract_json_from_content(result["answer"])["10. Reasoning"]
print(reasoning)

The patient's medical history indicates that they have GERD (gastroesophageal reflux disease) and Barrett's esophagus, which is a complication of GERD. However, there is no mention of mild to moderate esophagitis, peptic ulcer disease, upper GI symptoms, ICU stress ulcer prophylaxis, severe esophagitis, or a documented history of bleeding GI ulcer. Additionally, there is no indication of chronic NSAID use with bleeding risk.


In [52]:
result["context"]

[Document(metadata={'deid_note_key': 'D4924D4A405D5E', 'EncounterKey': 'DA15CCF42ACF8B', 'NoteDate': '2023-09-02', 'DepartmentSpecialty': 'Inpatient Nursing', 'ProviderSpecialty': 'UCSF', 'ProviderType': 'Nurse Practitioner'}, page_content='EMERGENCY DEPARTMENT PHYSICIAN NOTE      ED Attending(s): ***** ***** ***** 09/02/23 00:14; ***** ***** 09/02/23 01:02    Chief Complaint   Patient presents with    Knee Pain     Acute on  chronic left knee pain reports hx of dvt.        HISTORY     Interpreter used? (Optional):     ***** ***** is a ***** y.o. old male with hx of HTN SVT PE DVT on Eliquis DM total knee replacement ankylosing spondylitis on chronic prednisone presented here with left posterior leg pain worse around his knee symptom is acute on chronic worse the last couple days.  No new trauma or *****.  Also admits not taking his Eliquis for the last month as he ran out of prescription.  Recently moved to ***** ***** from ***** *****.  He was in the process of transitioning to warfa

In [63]:
system = f"""You are a knowledgeable medical provider who specializes in medication management. 
Given a list of note context, explain the reasoning with cited parts of the note that support this answer: {reasoning}.

An example response would be in this format:

The patient has severe esophagitis. This is supported by the following parts of the note:

"The patient has been experiencing severe heartburn for the past 3 weeks." from the note on 2022-01-01 by the Provider Type Resident.


"""
mstr_prompt = ChatPromptTemplate.from_messages(
        [
        ("system", system), ("human", "{input}")
        ])
mstr_prompt.pretty_print()


You are a knowledgeable medical provider who specializes in medication management. 
Given a list of note context, explain the reasoning with cited parts of the note that support this answer: The patient's medical history indicates that they have GERD (gastroesophageal reflux disease) and Barrett's esophagus, which is a complication of GERD. However, there is no mention of mild to moderate esophagitis, peptic ulcer disease, upper GI symptoms, ICU stress ulcer prophylaxis, severe esophagitis, or a documented history of bleeding GI ulcer. Additionally, there is no indication of chronic NSAID use with bleeding risk..

An example response would be in this format:

The patient has severe esophagitis. This is supported by the following parts of the note:

"The patient has been experiencing severe heartburn for the past 3 weeks." from the note on 2022-01-01 by the Provider Type Resident.





[33;1m[1;3m{input}[0m


In [64]:
print({"input":result["context"]})

{'input': [Document(metadata={'deid_note_key': 'D4924D4A405D5E', 'EncounterKey': 'DA15CCF42ACF8B', 'NoteDate': '2023-09-02', 'DepartmentSpecialty': 'Inpatient Nursing', 'ProviderSpecialty': 'UCSF', 'ProviderType': 'Nurse Practitioner'}, page_content='EMERGENCY DEPARTMENT PHYSICIAN NOTE      ED Attending(s): ***** ***** ***** 09/02/23 00:14; ***** ***** 09/02/23 01:02    Chief Complaint   Patient presents with    Knee Pain     Acute on  chronic left knee pain reports hx of dvt.        HISTORY     Interpreter used? (Optional):     ***** ***** is a ***** y.o. old male with hx of HTN SVT PE DVT on Eliquis DM total knee replacement ankylosing spondylitis on chronic prednisone presented here with left posterior leg pain worse around his knee symptom is acute on chronic worse the last couple days.  No new trauma or *****.  Also admits not taking his Eliquis for the last month as he ran out of prescription.  Recently moved to ***** ***** from ***** *****.  He was in the process of transitionin

In [65]:
mstr_chain = (
        {"input": RunnablePassthrough()}
        | mstr_prompt 
        | llama_3_1
        | StrOutputParser()
)

mstr_answer = mstr_chain.invoke({"input":result["context"]})
print(mstr_answer)

The patient does not have mild to moderate esophagitis, peptic ulcer disease, upper GI symptoms, ICU stress ulcer prophylaxis, severe esophagitis, or a documented history of bleeding GI ulcer. Additionally, there is no indication of chronic NSAID use with bleeding risk.

This is supported by the following parts of the note:

* "Gastrointestinal: Negative for blood in stool and *****." from the note on 2023-09-02 by the Provider Type Resident.
* "Abdominal: General: Abdomen is flat. Bowel sounds are normal. There is no distension. Palpations: Abdomen is soft. There is no mass. Tenderness: There is no abdominal tenderness. There is no guarding." from the note on 2023-09-02 by the Provider Type Nurse Practitioner.
* "Past Medical History: Diagnosis Date • GERD (gastroesophageal reflux disease) Barretts appearing mucosa but not dysplasia on path" from the note on 2023-09-02 by the Provider Type Resident, which indicates that the patient has GERD and Barrett's esophagus, but no mention of e