In [4]:
import pandas as pd
import numpy as np
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
import os
import json
from pathlib import Path


In [5]:
#from dotenv import load_dotenv

#load_dotenv()

groq_key = os.environ['groqkey']

data_path = Path(os.environ['data_path'])


In [6]:
from extraction import llmAgent
from query import DataLoader
from main import main

  embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
  from tqdm.autonotebook import tqdm, trange


In [7]:
encounter_key = "SyntheticPt1"

In [8]:
final_recommendation, final_reasoning, token_usage, search_history_so_far, token_count_history = main(
    groq_key=groq_key, data_path=data_path, encounter_key=encounter_key
)

In [9]:
final_recommendation

'stop'

In [10]:
final_reasoning 

'The recommendation is to stop the PPI due to evidence of a completed Helicobacter Pylori (H. Pylori) infection treatment with combination therapy, indicating that the PPI is no longer necessary for this condition.'

In [11]:
search_history_so_far

{'diagnosis_source_continue': {'diagnosis_boolean': '0',
  'explanation': "There is no evidence of the specific conditions in the provided information. The patient's diagnoses include Type 1 Diabetes Mellitus, H. Pylori Infection, and Chronic Migraines, but none of the conditions that would indicate a need to continue, reduce, or stop the PPI are present."},
 'encounters_source_continue': {'diagnosis_boolean': '0',
  'explanation': "There is no evidence in the provided information to support the presence of 'Barretts Esophagus or esophageal cell changes', 'Chronic Non-Steroidal Anti Inflammatory (NSAID) use or GI prophylaxis NSAID use', 'Severe esophagitis including bleeding esophagitis or esophageal ulcer', or 'History of gastrointestinal bleeding, gastric ulcer, upper GI bleed, or peptic ulcer hemorrhage'. The primary diagnosis is Diabetic Ketoacidosis, and there is no mention of any gastrointestinal conditions that would warrant the use of a PPI."},
 'diagnosis_source_stop': {'diagn

### test data retrieval

In [11]:
llm_agent = llmAgent(groq_key=groq_key, data_path=data_path)

In [12]:
diagnosis_data_dict = llm_agent.get_data(
    encounter_key=encounter_key, source="diagnosis"
)
diagnosis_data_dict

{'hospitalAcquiredDx': '[{"EncounterKey":"SyntheticPt2","DxName":"Pressure Ulcer"}]',
 'presentOnAdmitDx': '[{"EncounterKey":"SyntheticPt2","DxName":"Chronic Obstructive Pulmonary Disease"},{"EncounterKey":"SyntheticPt2","DxName":"Hypertension"},{"EncounterKey":"SyntheticPt2","DxName":"Hyperlipidemia"},{"EncounterKey":"SyntheticPt2","DxName":"Gastroesophageal Reflux Disease"}]'}

In [13]:
encounters_data_dict = llm_agent.get_data(
    encounter_key=encounter_key, source="encounters"
)
encounters_data_dict

'[{"EncounterKey":"SyntheticPt2","PatientKey":"SyntheticPt2","Sex":"M","Age_y":67,"BirthDate":"1956-09-15","PtAdmitDate":"2024-09-15","PtDischargeDate":"2024-09-20","DRG":"123","FinancialClass":"Medicare","AdmissionOrigin":"Emergency","AdmissionSource":"Referral","AdmissionType":"Emergency","PrimaryDx":"COPD exacerbation","PresentOnAdmissionDiagnosisComboKey":1,"HospitalAcquiredDiagnosisComboKey":0,"DischargeDisposition":"Home","DischargePatientClass":"Inpatient"}]'

In [14]:
noteText = llm_agent.get_data(encounter_key=encounter_key, source="notes")
noteText

Unnamed: 0,deid_note_key,EncounterKey,NoteDate,DepartmentSpecialty,ProviderSpecialty,ProviderType,NoteText,is_on_last_note_date,key,PtDischargeDate,discharge_text,llm_summary
1501,SyntheticPt2_Note1,SyntheticPt2,9/15/2024,Inpatient Nursing,Pulmonary Medicine,Resident,"Yes, based on the clinician note, I found the ...",0.0,SyntheticPt2,9/20/2024,,"Yes, based on the clinician note, I found the ..."
1504,SyntheticPt2_Note4,SyntheticPt2,9/18/2024,Inpatient Nursing,Pulmonary Medicine,Resident,"Yes, there is a potential match for the diagno...",0.0,SyntheticPt2,9/20/2024,,"Yes, there is a potential match for the diagno..."
1505,SyntheticPt2_Note5,SyntheticPt2,9/20/2024,Inpatient Nursing,Pulmonary Medicine,Resident,"Yes, based on the clinician note, there are a ...",1.0,SyntheticPt2,9/20/2024,,"Yes, based on the clinician note, there are a ..."


# Test extraction

In [10]:
diagnosis = "Barretts Esophagus"

In [14]:
from pydantic import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser


class DiagnosisSearchDict(BaseModel):
    diagnosis_boolean: str = Field(
        description="True if the diagnosis is found, else False"
    )
    explanation: str = Field(
        description="A concise explanation for how the determination of the diagnosis was made"
    )

In [15]:
parser = JsonOutputParser(pydantic_object=DiagnosisSearchDict)

In [16]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    template="You are a knowledgeable medical provider who specializes in medication management. In the following case, your patient is prescribed a PPI (proton pump inhibitor) and need to make a decision to continue, reduce, or stop the PPI. Determine if there is evidence of the specific condition which will help determine whether to continue, reduce, or stop the medication on discharge.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [17]:
chain = prompt | llm_agent.llm | parser

In [18]:
output = chain.invoke(
    {
        "query": f"Based on the provided information here: {diagnosis_data_dict}, is there evidence of {diagnosis}? Do NOT assume a condition based on prescribed medication. We know all of these patients are prescribed a ppi, but we need to know why. Be very sure of a diagnosis."
    }
)

In [19]:
output

{'diagnosis_boolean': 'False',
 'explanation': "The patient's diagnoses include Gastrointestinal Bleeding (GIB), but there is no mention of Barrett's Esophagus."}

In [None]:
# output.usage_metadata['total_tokens']

487

In [11]:
llm_agent.extract_diagnosis(
    diagnosis_data_dict=diagnosis_data_dict, diagnosis=diagnosis
)

{'diagnosis_boolean': 'False',
 'explanation': "The patient has a diagnosis of Gastrointestinal Bleeding (GIB), but there is no evidence of Barrett's Esophagus in the provided information."}

In [12]:
llm_agent.extract_encounter(
    encounters_data_dict=encounters_data_dict, diagnosis=diagnosis
)

{'diagnosis_boolean': 'False',
 'explanation': "The primary diagnosis is gastrointestinal hemorrhage, unspecified, which does not provide direct evidence of Barrett's Esophagus."}

# Test notes extraction

In [None]:
llm_agent.extract_notes(noteText=noteText, diagnosis=diagnosis)

{'diagnosis_boolean': 'False',
 'explanation': "There is no evidence of Barrett's Esophagus in the provided information. The EGD and Colonoscopy results from 03/10/14 and the recent EGD results mention a normal esophagus, and there is no mention of Barrett's Esophagus or any other esophageal abnormalities that would suggest this diagnosis."}

In [38]:
from langchain.output_parsers import PydanticOutputParser
from langchain_community.document_loaders import DataFrameLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_groq import ChatGroq
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pydantic import BaseModel, Field


embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
text_splitter = RecursiveCharacterTextSplitter()

loader = DataFrameLoader(
    data_frame=noteText,
    page_content_column="NoteText",
    engine="pandas",
)

documents = loader.load_and_split()

vector_store = FAISS.from_documents(documents, embeddings)

retriever = vector_store.as_retriever(search_type="similarity", k=5)

parser = JsonOutputParser(pydantic_object=DiagnosisSearchDict)
# parser = PydanticOutputParser(pydantic_object=DiagnosisSearchDict)

In [39]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [40]:
from langchain_core.runnables import RunnablePassthrough

prompt = PromptTemplate(
    template="You are a knowledgeable medical provider who specializes in medication management. In the following case, your patient is prescribed a PPI (proton pump inhibitor) and need to make a decision to continue, reduce, or stop the PPI. Determine if there is evidence of the specific condition which will help determine whether to continue, reduce, or stop the medication on discharge.\n{format_instructions}\nUse this information for your answer: {context}\n{query}\n",
    input_variables=["context", "query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# rag_chain = (
#     RunnablePassthrough.assign(
#         context=(lambda x: llmAgent.format_docs(x["context"]))
#     )
#     | prompt
#     | llm_agent.llm
# )

rag_chain = (
    {"context": retriever | format_docs, "query": RunnablePassthrough()}
    | prompt
    | llm_agent.llm
    | parser
)


In [None]:
rag_chain.invoke(
    f"Is there evidence of {diagnosis}? Do NOT assume a condition based on prescribed medication. We know all of these patients are prescribed a ppi, but we need to know why. Be very sure of a diagnosis."
)

{'diagnosis_boolean': 'False',
 'explanation': "There is no evidence of Barrett's Esophagus in the provided information. The EGD and Colonoscopy results from 03/10/14 and the recent EGD results mention a normal esophagus, and there is no mention of Barrett's Esophagus or any other esophageal abnormalities that would suggest this diagnosis."}

In [None]:
retrieve_docs = (lambda x: x["query"]) | retriever
chain = RunnablePassthrough.assign(context=retrieve_docs).assign(answer=rag_chain)

RunnableAssign(mapper={
  context: RunnableLambda(lambda x: x['query'])
           | VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x3a5d84bf0>, search_kwargs={})
})
| RunnableAssign(mapper={
    answer: RunnableAssign(mapper={
              context: RunnableLambda(lambda x: llmAgent.format_docs(x['context']))
            })
            | PromptTemplate(input_variables=['query'], input_types={}, partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {

In [44]:
# chain = chain | parser

In [46]:
chain.invoke(
    {
        "query": "Based on the provided information here: {context}, is there evidence of {diagnosis}? Do NOT assume a condition based on prescribed medication. We know all of these patients are prescribed a ppi, but we need to know why. Be very sure of a diagnosis."
    }
)

{'query': 'Based on the provided information here: {context}, is there evidence of {diagnosis}? Do NOT assume a condition based on prescribed medication. We know all of these patients are prescribed a ppi, but we need to know why. Be very sure of a diagnosis.',
 'context': [Document(metadata={'deid_note_key': 'D345CFCA85F004', 'EncounterKey': 'D6253A5CE371EA', 'NoteDate': Timestamp('2023-09-18 00:00:00'), 'DepartmentSpecialty': 'Inpatient Nursing', 'ProviderSpecialty': 'UCSF', 'ProviderType': 'Resident', 'is_on_last_note_date': 0, 'key': 'D6253A5CE371EA', 'PtDischargeDate': Timestamp('2023-09-21 00:00:00'), 'discharge_text': None}, page_content='with quadruple therapy  - OK for AC and AP from ***** perspective - would reinitiate all indicated AC/AP under observation  - would hold oral iron - if he requires iron would prefer IV iron inpatient to allow for better assessment of stools for UGIB  - maintain 2 ***** while AC/AP reinitiated  - maintain active T&S  - agree with evaluation " fo

In [6]:
main(groq_key=groq_key, data_path=data_path, encounter_key=encounter_key)

final token count: 812
Recommendation: 
continue

Reasoning: 
The patient has a documented history of gastrointestinal bleeding (GIB) and a history of bleeding GI ulcer, which suggests they may be at risk for upper GI symptoms and bleeding GI ulcers. The patient's primary diagnosis is gastrointestinal hemorrhage, and their age and hospital admission for this condition may indicate a higher risk for certain conditions. However, there is no specific information about other conditions such as mild to moderate esophagitis, GERD, peptic ulcer disease, Barrett's esophagus, chronic NSAID use with bleeding risk, severe esophagitis, and H pylori infection. The patient's history of bleeding and anemia suggests a possible risk for future bleeding events.

Label df: 
              key                                             reason  \
0  D6253A5CE371EA  example with gastrointestinal bleed should con...   

  recommendation  
0       continue  


In [7]:
llm_agent = llmAgent(groq_key=groq_key, data_path=data_path)

In [8]:
diagnosis_json = llm_agent.extract_diagnosis(encounter_key=encounter_key)
diagnosis_json

({'Mild to moderate esophagitis': False,
  'GERD': False,
  'Peptic Ulcer Disease': False,
  'Upper GI symptoms': True,
  'ICU Stress Ulcer Prophylaxis': False,
  'Barretts Esophagus': False,
  'Chronic NSAID use with bleeding risk': False,
  'Severe esophagitis': False,
  'Documented history of bleeding GI ulcer': True,
  'H pylori infection': False,
  'Reasoning': 'The patient has a documented history of gastrointestinal bleeding (GIB), which suggests that they may have experienced upper GI symptoms and may have a history of bleeding GI ulcer. However, there is no specific information about the other conditions. The presence of GIB does not necessarily imply the presence of the other conditions, but it does suggest that the patient may be at risk for upper GI symptoms and bleeding GI ulcers.'},
 617)

In [9]:
encounter_json = llm_agent.extract_encounter_info(encounter_key=encounter_key)
encounter_json

({'Mild to moderate esophagitis': False,
  'GERD': False,
  'Peptic Ulcer Disease': False,
  'Upper GI symptoms': False,
  'ICU Stress Ulcer Prophylaxis': False,
  'Barretts Esophagus': False,
  'Chronic NSAID use with bleeding risk': False,
  'Severe esophagitis': False,
  'Documented history of bleeding GI ulcer': False,
  'H pylori infection': False,
  'Reasoning': "The primary diagnosis is listed as 'Gastrointestinal hemorrhage, unspecified', which suggests that the patient experienced bleeding in the gastrointestinal tract, but the exact cause or underlying condition is not specified. There is no direct mention of any of the listed conditions in the provided JSON information. Therefore, it is not possible to confirm the presence of any of these conditions based on the available data."},
 649)

In [10]:
notes = llm_agent.extract_notes(encounter_key=encounter_key)
notes

({'Mild to moderate esophagitis': False,
  'GERD': False,
  'Peptic Ulcer Disease': False,
  'Upper GI symptoms': False,
  'ICU Stress Ulcer Prophylaxis': False,
  'Barretts Esophagus': False,
  'Chronic NSAID use with bleeding risk': False,
  'Severe esophagitis': False,
  'Documented history of bleeding GI ulcer': True,
  'H pylori infection': True,
  'Reasoning': "The patient has a history of gastrointestinal bleeding and has been diagnosed with H. pylori infection. The patient's recent endoscopy results show mild antral erythema and a punctate area of submucosal erythema in the gastric body. The patient's symptoms and test results suggest a high risk for gastrointestinal bleeding, and therefore, the patient's H. pylori infection and history of bleeding GI ulcer are relevant to the diagnosis."},
 3996,
 [Document(metadata={'deid_note_key': 'D345CFCA85F004', 'EncounterKey': 'D6253A5CE371EA', 'NoteDate': Timestamp('2023-09-18 00:00:00'), 'DepartmentSpecialty': 'Inpatient Nursing', 'Pr

In [None]:
notes_json["answer"].content  # this is the json

'Based on the provided information, here is a JSON instance that conforms to the given schema:\n\n```\n{\n  "Mild_to_moderate_esophagitis": false,\n  "GERD": false,\n  "Peptic_Ulcer_Disease": false,\n  "Upper_GI_symptoms": true,\n  "ICU_Stress_Ulcer_Prophylaxis": false,\n  "Barretts_Esophagus": false,\n  "Chronic_NSAID_use_with_bleeding_risk": false,\n  "Severe_esophagitis": false,\n  "Documented_history_of_bleeding_GI_ulcer": true,\n  "H_pylori_infection": true,\n  "Reasoning": "The patient has a history of gastrointestinal bleeding and has been diagnosed with H. pylori infection. The patient\'s recent labs show a low hemoglobin level and a positive Helicobacter pylori IgG antibody test. The patient\'s endoscopy results show mild antral erythema and a punctate area of submucosal erythema in the gastric body. The patient\'s symptoms and test results suggest that they have upper GI symptoms and a documented history of bleeding GI ulcer."\n}\n```\n\nNote that some of the fields in the JS

In [12]:
notes_json

{'input': 'Based on the information from the note {context}, does the patient have any of the following:\n              1. Mild to moderate esophagitis\n              2. GERD \n              3. Peptic Ulcer Disease\n              4. Upper GI symptoms\n              5. ICU Stress Ulcer Prophylaxis\n              6. Barretts Esophagus\n              7. Chronic NSAID use with bleeding risk\n              8. Severe esophagitis\n              9. Documented history of bleeding GI ulcer\n              10. H pylori infection\n              11. Explain the reasoning for your answer\n            Return the answer for each of these as a formatted JSON object with the key being the condition and the value being a boolean value for the first 10.  For the final question, return a string with the reasoning for your answer.',
 'context': [Document(metadata={'deid_note_key': 'D345CFCA85F004', 'EncounterKey': 'D6253A5CE371EA', 'NoteDate': '2023-09-18', 'DepartmentSpecialty': 'Inpatient Nursing', 'Provid

In [None]:
def replace_underscores_in_keys(json_obj):
    if isinstance(json_obj, dict):
        new_obj = {}
        for key, value in json_obj.items():
            new_key = key.replace("_", " ")
            new_obj[new_key] = replace_underscores_in_keys(value)
        return new_obj
    elif isinstance(json_obj, list):
        return [replace_underscores_in_keys(item) for item in json_obj]
    else:
        return json_obj

In [None]:
notes_json = replace_underscores_in_keys(notes_json)
notes_json

In [None]:
import re


def extract_json_content(response):
    # Extract content between the curly braces
    match = re.search(r"\{(.*)\}", response, re.DOTALL)
    if match:
        return match.group(1).strip()
    else:
        raise ValueError("No JSON content found between curly braces.")


def convert_to_json(response):
    # Remove comments from the string
    cleaned_response = re.sub(r"//.*", "", response)
    # Add curly braces to form a valid JSON object
    json_string = "{" + cleaned_response.strip() + "}"
    # Parse the JSON string into a dictionary
    response_dict = json.loads(json_string)
    return response_dict


In [None]:
notes_json_parsed = extract_json_content(notes_json)
final_json = convert_to_json(notes_json_parsed)

In [None]:
system = (
    "You are a knowledgeable medical provider who specializes in medication management."
)
human = "{text}"
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

chain = prompt | llm_agent.llm
response = chain.invoke(
    {
        "text": f"""Based on the following json files, please provide a single explanation of the reasoning given by the 'Reasoning' key. Summarize given equal 
        weight to each. Do not add any additional information, only summarize what is given.
        {diagnosis_json}
        {encounter_json}
        {notes_json}"""
    }
)

In [None]:
response.pretty_print()

In [None]:
response.content

In [None]:
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator


class NoteResponse(BaseModel):
    Mild_to_moderate_esoophagitis: bool = Field(
        description="Mild to moderate esoophagitis"
    )
    GERD: bool = Field(description="GERD")
    Peptic_Ulcer_Disease: bool = Field(description="Peptic Ulcer Disease")
    Upper_GI_symptoms: bool = Field(description="Upper GI symptoms")
    ICU_Stress_Ulcer_Prophylaxis: bool = Field(
        description="ICU Stress Ulcer Prophylaxis"
    )
    Barretts_Esophagus: bool = Field(description="Barrett's Esophagus")
    Chronic_NSAID_use_with_bleeding_risk: bool = Field(
        description="Chronic NSAID use with bleeding risk"
    )
    Severe_esophagitis: bool = Field(description="Severe esophagitis")
    Documented_history_of_bleeding_GI_ulcer: bool = Field(
        description="Documented history of bleeding GI ulcer"
    )
    Reasoning: str = Field(description="Explain the reasoning for your answer")

'{\n  "title": "NoteResponse",\n  "type": "object",\n  "properties": {\n    "Mild_to_moderate_esoophagitis": {\n      "title": "Mild To Moderate Esoophagitis",\n      "description": "Mild to moderate esoophagitis",\n      "type": "boolean"\n    },\n    "GERD": {\n      "title": "Gerd",\n      "description": "GERD",\n      "type": "boolean"\n    },\n    "Peptic_Ulcer_Disease": {\n      "title": "Peptic Ulcer Disease",\n      "description": "Peptic Ulcer Disease",\n      "type": "boolean"\n    },\n    "Upper_GI_symptoms": {\n      "title": "Upper Gi Symptoms",\n      "description": "Upper GI symptoms",\n      "type": "boolean"\n    },\n    "ICU_Stress_Ulcer_Prophylaxis": {\n      "title": "Icu Stress Ulcer Prophylaxis",\n      "description": "ICU Stress Ulcer Prophylaxis",\n      "type": "boolean"\n    },\n    "Barretts_Esophagus": {\n      "title": "Barretts Esophagus",\n      "description": "Barrett\'s Esophagus",\n      "type": "boolean"\n    },\n    "Chronic_NSAID_use_with_bleeding_r

In [None]:
final_dict = {}
for key in diagnosis_json.keys():
    # print(key)
    if not key == "Reasoning":
        diagnosis_bool = diagnosis_json[key]
        encounter_bool = encounter_json[key]
        notes_bool = notes_json[key]

        final_dict[key] = diagnosis_bool or encounter_bool or notes_bool

final_dict

In [9]:
# DL = DataLoader(data_path=base_path)

In [10]:
# DL.get_label(encounter_key=encounter_key)

In [11]:
# DL.get_diagnosis_data(encounter_key=encounter_key)

In [12]:
# DL.get_encounter_data(encounter_key=encounter_key)