In [1]:
import pandas as pd
import numpy as np
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
import os
import json
from pathlib import Path

In [2]:
groq_key = "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"

In [3]:
from extraction import ExtractionAgent
from query import DataLoader
from main import main


  embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
  from tqdm.autonotebook import tqdm, trange


In [4]:
data_path = Path(
    r"C:\Users\grays\OneDrive\Documents\GitHub\DataSci210_MedicationDeprescriber\Data"
    # "/Users/yarg/Library/CloudStorage/OneDrive-Personal/Documents/GitHub/DataSci210_MedicationDeprescriber/Data"
)

In [5]:
encounter_key = "D6253A5CE371EA"

In [6]:
recommendation_dict = {
    "continue": [
        "Barretts Esophagus",
        "Chronic NSAID used with bleeding risk",
        "Severe esophagitis",
        "Documented history of bleeding GUI ulcer",
    ],
    "stop": [
        "Peptic Ulcer Disease",
        "Chronic NSAID use with bleeding risk",
        "ICU Stress Ulcer Prophylaxis",
        "H Pylori infection",
    ],
    "deprescribe": [
        "Mild to moderate esophagitis",
        "GERD",
    ],
}

In [7]:
recommendation_str, final_reasoning, token_usage, search_history_so_far, token_count_history = main(
    groq_key=groq_key,
    data_path=data_path,
    encounter_key=encounter_key
)

In [10]:
recommendation_str

'deprescribe'

In [11]:
final_reasoning

"The recommendation is to deprescribe the medication due to a lack of evidence supporting the presence of conditions that typically warrant its use, such as esophagitis or GERD, as the patient's EGD and colonoscopy results showed a normal esophagus with no ulcers, erosions, or other abnormalities."

In [12]:
token_usage

15995

In [13]:
token_count_history

{'diagnosis_source_continue': 573,
 'encounters_source_continue': 694,
 'diagnosis_source_stop': 589,
 'encounters_source_stop': 717,
 'diagnosis_source_deprescribe': 546,
 'encounters_source_deprescribe': 661,
 'notes_source_continue': 3560,
 'notes_source_stop': 3562,
 'notes_source_deprescribe': 3968,
 'final_summary': 1125}

In [14]:
search_history_so_far

{'diagnosis_source_continue': {'diagnosis_boolean': '0',
  'explanation': "There is no evidence of Barrett's Esophagus, Chronic NSAID use with bleeding risk, Severe esophagitis, or Documented history of bleeding GI ulcer in the provided information. The patient has a diagnosis of GIB (gastrointestinal bleeding) which may be the reason for the PPI prescription, but it is not explicitly listed in the conditions to check for."},
 'encounters_source_continue': {'diagnosis_boolean': '0',
  'explanation': "Based on the provided information, there is no clear evidence of Barrett's Esophagus, Chronic NSAID use with bleeding risk, Severe esophagitis, or Documented history of bleeding GI ulcer. The primary diagnosis is listed as 'Gastrointestinal hemorrhage, unspecified', which does not provide enough information to confirm any of the specified conditions. Therefore, it is not possible to determine the reason for the PPI prescription with certainty."},
 'diagnosis_source_stop': {'diagnosis_boole

In [8]:
llm_agent = ExtractionAgent(groq_key=groq_key, data_path=data_path)

In [9]:
diagnosis_data_dict = llm_agent.get_data(
    encounter_key=encounter_key, source="diagnosis"
)
diagnosis_data_dict

{'hospitalAcquiredDx': '[{"EncounterKey":"D6253A5CE371EA","DxName":"*Unspecified"}]',
 'presentOnAdmitDx': '[{"EncounterKey":"D6253A5CE371EA","DxName":"GIB (gastrointestinal bleeding)"}]'}

In [None]:
diagnosis_list = [
        "Barretts Esophagus",
        "Chronic NSAID used with bleeding risk",
        "Severe esophagitis",
        "Documented history of bleeding GUI ulcer",
    ]

In [None]:
from pydantic import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser


class DiagnosisSearchDict(BaseModel):
    diagnosis_boolean: str = Field(
        description="1 if the diagnosis is found, else 0"
    )
    explanation: str = Field(
        description="A concise explanation for how the determination of the diagnosis was made"
    )

In [None]:
parser = JsonOutputParser(pydantic_object=DiagnosisSearchDict)

In [None]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    template="""You are a knowledgeable medical provider who specializes in medication management. In the following case, your patient is prescribed
    a PPI (proton pump inhibitor) and need to make a decision to continue, reduce, or stop the PPI. Determine if there is evidence of the specific
    condition which will help determine whether to continue, reduce, or stop the medication on discharge.
    # Response Format Instructions #
    {format_instructions}
    # Question #
    {query}""",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [None]:
chain = prompt | llm_agent.llm # | parser

In [None]:
output = chain.invoke(
    {
        "query": f"Based on the provided information here: {diagnosis_data_dict}, is there evidence of {diagnosis_list}? Do NOT assume a condition based on prescribed medication. We know all of these patients are prescribed a ppi, but we need to know why. Be very sure of a diagnosis."
    }
)

In [None]:
output.response_metadata["token_usage"]["total_tokens"]

In [None]:
pydantic_obj = parser.parse(output.content)
pydantic_obj

In [None]:
llm_agent.extract_diagnosis(diagnosis_data_dict=diagnosis_data_dict, diagnosis=diagnosis_list)

In [None]:
encounters_data_dict = llm_agent.get_data(
    encounter_key=encounter_key, source="encounters"
)
encounters_data_dict

'[{"EncounterKey":"D6253A5CE371EA","PatientKey":"DE3B4E1AA899F6","Sex":"Male","Age_y":81,"BirthDate":"1942-01-31","PtAdmitDate":"2023-09-17","PtDischargeDate":"2023-09-21","DRG":"GASTROINTESTINAL HEMORRHAGE WITH MCC","FinancialClass":"Medicare Advantage HMO\\/Senior","AdmissionOrigin":"Transfer Center Admission","AdmissionSource":"Transfer - Acute Hospital","AdmissionType":"Urgent","PrimaryDx":"Gastrointestinal hemorrhage, unspecified","PresentOnAdmissionDiagnosisComboKey":1848960,"HospitalAcquiredDiagnosisComboKey":-1,"DischargeDisposition":"Home or Self Care","DischargePatientClass":"Inpatient"}]'

: 

In [None]:
noteText = llm_agent.get_data(encounter_key=encounter_key, source="notes")
noteText

In [None]:
from langchain.output_parsers import PydanticOutputParser
from langchain_community.document_loaders import DataFrameLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_groq import ChatGroq
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pydantic import BaseModel, Field


embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
text_splitter = RecursiveCharacterTextSplitter()

loader = DataFrameLoader(
    data_frame=noteText,
    page_content_column="NoteText",
    engine="pandas",
)

documents = loader.load_and_split()

vector_store = FAISS.from_documents(documents, embeddings)

retriever = vector_store.as_retriever(search_type="similarity", k=3)

parser = JsonOutputParser(pydantic_object=DiagnosisSearchDict)
# parser = PydanticOutputParser(pydantic_object=DiagnosisSearchDict)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
from langchain_core.runnables import RunnablePassthrough

prompt = PromptTemplate(
    template="""You are a knowledgeable medical provider who specializes in medication management. In the following case, your patient is prescribed a PPI (proton pump inhibitor) and need to make a decision to continue, reduce, or stop the PPI. Determine if there is evidence of the specific condition which will help determine whether to continue, reduce, or stop the medication on discharge.
    Use the Context as information for your answer: 
    # Context #
    {context}
    Do NOT assume a condition based on prescribed medication. We know all of these patients are prescribed a ppi, but we need to know why. Be very sure of a diagnosis.
    # Format Instructions #
    {format_instructions}
    # Question #
    {query}
    """,
    input_variables=["context", "query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "query": RunnablePassthrough()}
    | prompt
    | llm_agent.llm
    # | parser
)

In [None]:
temp_out = rag_chain.invoke(
    f"Is there evidence of {diagnosis}?"
)

In [None]:
temp_out.response_metadata["token_usage"]["total_tokens"]

In [None]:
pydantic_obj_notes = parser.parse(temp_out.content)
pydantic_obj_notes

In [None]:
retriever_out = retriever.invoke(f"Is there evidence of {diagnosis}?")
retriever_out = retriever_out[:3]
retriever_out

In [None]:
retriever_out_format = format_docs(retriever_out)
retriever_out_format

In [None]:
from langchain_core.runnables import RunnablePassthrough

prompt = PromptTemplate(
    template="You are a knowledgeable medical provider who specializes in medication management. In the following case, your patient is prescribed a PPI (proton pump inhibitor) and need to make a decision to continue, reduce, or stop the PPI. Determine if there is evidence of the specific condition which will help determine whether to continue, reduce, or stop the medication on discharge.\n{format_instructions}\nUse this information for your answer: {context}\n{query}\n",
    input_variables=["context", "query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)


rag_chain = (
    {"context": RunnablePassthrough(), "query": RunnablePassthrough()}
    | prompt
    | llm_agent.llm
    # | parser
)

In [None]:
rag_chain

In [None]:
out = rag_chain.invoke(
    f"Is there evidence of {diagnosis}? Use the following context for information: {retriever_out_format}. The context has ended. Do NOT assume a condition based on prescribed medication. We know all of these patients are prescribed a ppi, but we need to know why. Be very sure of a diagnosis."
)
out

In [None]:
out.response_metadata["token_usage"]["total_tokens"]

In [None]:
pydantic_obj_notes = parser.parse(out.content)
pydantic_obj_notes

In [None]:
recommendation_dict = {
    "continue": [
        "Barretts Esophagus",
        "Chronic NSAID used with bleeding risk",
        "Severe esophagitis",
        "Documented history of bleeding GUI ulcer",
    ],
    "stop": [
        "Peptic Ulcer Disease",
        "Chronic NSAID use with bleeding risk",
        "ICU Stress Ulcer Prophylaxis",
        "H Pylori infection",
    ],
    "deprescribe": [
        "Mild to moderate esophagitis",
        "GERD",
    ],
}

In [None]:
# track the number of tokens used
token_usage = 0
final_bool = False
diagnosis_dict_dict = {}

In [None]:
for recommendation_str, diagnosis_list in recommendation_dict.items():
    