In [1]:
import pandas as pd
import numpy as np
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
import os
import json
from pathlib import Path
import pprint

In [2]:
groq_key = "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"

In [3]:
from extraction import llmAgent
from query import DataLoader
from main import main

  embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
  from tqdm.autonotebook import tqdm, trange


In [4]:
data_path = Path(
    # r"C:\Users\grays\OneDrive\Documents\GitHub\DataSci210_MedicationDeprescriber\Data"
    "/Users/yarg/Library/CloudStorage/OneDrive-Personal/Documents/GitHub/DataSci210_MedicationDeprescriber/Data"
)

In [5]:
encounter_key = "SyntheticPt3"

In [6]:
final_explanation, token_usage, search_history_thus_far_list, token_count_history = (
    main(groq_key=groq_key, data_path=data_path, encounter_key=encounter_key)
)

In [7]:
pprint.pprint(final_explanation)

('**Medication Management Summary**\n'
 '\n'
 "Based on the patient's health information extracted from the patient "
 'diagnosis record, patient encounter record, and patient medical notes '
 "history, the following recommendations are made for the patient's current "
 'medication regimen:\n'
 '\n'
 '* **Continue**: The patient is receiving Pantoprazole 40 mg daily for stress '
 'ulcer prophylaxis due to hemodynamic instability in the ICU setting with '
 'acute myocardial infarction and atrial fibrillation (identified in the '
 'Patient notes history). This is consistent with standard ICU practice for '
 'patients with hemodynamic instability.\n'
 '* **Stop**: There is no direct evidence to support the use of a PPI for '
 'Peptic Ulcer Disease or Gastroduodenal ulcer, Upper GI Symptoms, ICU Stress '
 'Ulcer Prophylaxis, or Completed Heliobacter Pylori (H. Pylori) infection '
 '(identified in the Patient diagnosis record and Patient encounter record). '
 'However, the patient is receiv

In [8]:
token_usage

9029

In [9]:
token_count_history

{'diagnosis_source_continue': 692,
 'diagnosis_source_stop': 690,
 'diagnosis_source_deprescribe': 614,
 'encounters_source_continue': 724,
 'encounters_source_stop': 722,
 'encounters_source_deprescribe': 663,
 'notes_source_continue': 831,
 'notes_source_stop': 790,
 'notes_source_deprescribe': 769,
 'final_summary': 2534}

In [10]:
search_history_thus_far_list

[{'diagnosis_boolean': 'False',
  'explanation': "The provided information does not include any evidence of Barrett's Esophagus or esophageal cell changes, Chronic Non-Steroidal Anti Inflammatory (NSAID) use or GI prophylaxis NSAID use, Severe esophagitis including bleeding esophagitis or esophageal ulcer, or History of gastrointestinal bleeding, gastric ulcer, upper GI bleed, or peptic ulcer hemorrhage. The listed diagnoses include Urinary Tract Infection, Atrial Fibrillation, Hypertension, and Hyperlipidemia, which do not support the use of a PPI for the specified conditions.",
  'source': 'Patient diagnosis record',
  'recommendation considered': 'continue',
  'associated diagnosis list': ['Barretts Esophagus or esophageal cell changes',
   'Chronic Non-Steroidal Anti Inflammatory (NSAID) use or GI prophylaxis NSAID use',
   'Severe esophagitis including bleeding esophagitis or esophageal ulcer',
   'History of gastrointestinal bleeding, gastric ulcer, upper GI bleed, or peptic ulce

In [8]:
llm_agent = llmAgent(groq_key=groq_key, data_path=data_path)

In [9]:
diagnosis_data_dict = llm_agent.get_data(
    encounter_key=encounter_key, source="diagnosis"
)
diagnosis_data_dict

{'hospitalAcquiredDx': '[{"EncounterKey":"D6253A5CE371EA","DxName":"*Unspecified"}]',
 'presentOnAdmitDx': '[{"EncounterKey":"D6253A5CE371EA","DxName":"GIB (gastrointestinal bleeding)"}]'}

In [None]:
diagnosis_list = [
    "Barretts Esophagus",
    "Chronic NSAID used with bleeding risk",
    "Severe esophagitis",
    "Documented history of bleeding GUI ulcer",
]

In [None]:
from pydantic import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser


class DiagnosisSearchDict(BaseModel):
    diagnosis_boolean: str = Field(description="1 if the diagnosis is found, else 0")
    explanation: str = Field(
        description="A concise explanation for how the determination of the diagnosis was made"
    )

In [None]:
parser = JsonOutputParser(pydantic_object=DiagnosisSearchDict)

In [None]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    template="""You are a knowledgeable medical provider who specializes in medication management. In the following case, your patient is prescribed
    a PPI (proton pump inhibitor) and need to make a decision to continue, reduce, or stop the PPI. Determine if there is evidence of the specific
    condition which will help determine whether to continue, reduce, or stop the medication on discharge.
    # Response Format Instructions #
    {format_instructions}
    # Question #
    {query}""",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [None]:
chain = prompt | llm_agent.llm  # | parser

In [None]:
output = chain.invoke(
    {
        "query": f"Based on the provided information here: {diagnosis_data_dict}, is there evidence of {diagnosis_list}? Do NOT assume a condition based on prescribed medication. We know all of these patients are prescribed a ppi, but we need to know why. Be very sure of a diagnosis."
    }
)

In [None]:
output.response_metadata["token_usage"]["total_tokens"]

In [None]:
pydantic_obj = parser.parse(output.content)
pydantic_obj

In [None]:
llm_agent.extract_diagnosis(
    diagnosis_data_dict=diagnosis_data_dict, diagnosis=diagnosis_list
)

In [None]:
encounters_data_dict = llm_agent.get_data(
    encounter_key=encounter_key, source="encounters"
)
encounters_data_dict

'[{"EncounterKey":"D6253A5CE371EA","PatientKey":"DE3B4E1AA899F6","Sex":"Male","Age_y":81,"BirthDate":"1942-01-31","PtAdmitDate":"2023-09-17","PtDischargeDate":"2023-09-21","DRG":"GASTROINTESTINAL HEMORRHAGE WITH MCC","FinancialClass":"Medicare Advantage HMO\\/Senior","AdmissionOrigin":"Transfer Center Admission","AdmissionSource":"Transfer - Acute Hospital","AdmissionType":"Urgent","PrimaryDx":"Gastrointestinal hemorrhage, unspecified","PresentOnAdmissionDiagnosisComboKey":1848960,"HospitalAcquiredDiagnosisComboKey":-1,"DischargeDisposition":"Home or Self Care","DischargePatientClass":"Inpatient"}]'

: 

In [None]:
noteText = llm_agent.get_data(encounter_key=encounter_key, source="notes")
noteText

In [None]:
from langchain.output_parsers import PydanticOutputParser
from langchain_community.document_loaders import DataFrameLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_groq import ChatGroq
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pydantic import BaseModel, Field


embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
text_splitter = RecursiveCharacterTextSplitter()

loader = DataFrameLoader(
    data_frame=noteText,
    page_content_column="NoteText",
    engine="pandas",
)

documents = loader.load_and_split()

vector_store = FAISS.from_documents(documents, embeddings)

retriever = vector_store.as_retriever(search_type="similarity", k=3)

parser = JsonOutputParser(pydantic_object=DiagnosisSearchDict)
# parser = PydanticOutputParser(pydantic_object=DiagnosisSearchDict)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
from langchain_core.runnables import RunnablePassthrough

prompt = PromptTemplate(
    template="""You are a knowledgeable medical provider who specializes in medication management. In the following case, your patient is prescribed a PPI (proton pump inhibitor) and need to make a decision to continue, reduce, or stop the PPI. Determine if there is evidence of the specific condition which will help determine whether to continue, reduce, or stop the medication on discharge.
    Use the Context as information for your answer: 
    # Context #
    {context}
    Do NOT assume a condition based on prescribed medication. We know all of these patients are prescribed a ppi, but we need to know why. Be very sure of a diagnosis.
    # Format Instructions #
    {format_instructions}
    # Question #
    {query}
    """,
    input_variables=["context", "query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "query": RunnablePassthrough()}
    | prompt
    | llm_agent.llm
    # | parser
)

In [None]:
temp_out = rag_chain.invoke(f"Is there evidence of {diagnosis}?")

In [None]:
temp_out.response_metadata["token_usage"]["total_tokens"]

In [None]:
pydantic_obj_notes = parser.parse(temp_out.content)
pydantic_obj_notes

In [None]:
retriever_out = retriever.invoke(f"Is there evidence of {diagnosis}?")
retriever_out = retriever_out[:3]
retriever_out

In [None]:
retriever_out_format = format_docs(retriever_out)
retriever_out_format

In [None]:
from langchain_core.runnables import RunnablePassthrough

prompt = PromptTemplate(
    template="You are a knowledgeable medical provider who specializes in medication management. In the following case, your patient is prescribed a PPI (proton pump inhibitor) and need to make a decision to continue, reduce, or stop the PPI. Determine if there is evidence of the specific condition which will help determine whether to continue, reduce, or stop the medication on discharge.\n{format_instructions}\nUse this information for your answer: {context}\n{query}\n",
    input_variables=["context", "query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)


rag_chain = (
    {"context": RunnablePassthrough(), "query": RunnablePassthrough()}
    | prompt
    | llm_agent.llm
    # | parser
)

In [None]:
rag_chain

In [None]:
out = rag_chain.invoke(
    f"Is there evidence of {diagnosis}? Use the following context for information: {retriever_out_format}. The context has ended. Do NOT assume a condition based on prescribed medication. We know all of these patients are prescribed a ppi, but we need to know why. Be very sure of a diagnosis."
)
out

In [None]:
out.response_metadata["token_usage"]["total_tokens"]

In [None]:
pydantic_obj_notes = parser.parse(out.content)
pydantic_obj_notes

In [None]:
recommendation_dict = {
    "continue": [
        "Barretts Esophagus",
        "Chronic NSAID used with bleeding risk",
        "Severe esophagitis",
        "Documented history of bleeding GUI ulcer",
    ],
    "stop": [
        "Peptic Ulcer Disease",
        "Chronic NSAID use with bleeding risk",
        "ICU Stress Ulcer Prophylaxis",
        "H Pylori infection",
    ],
    "deprescribe": [
        "Mild to moderate esophagitis",
        "GERD",
    ],
}

In [None]:
# track the number of tokens used
token_usage = 0
final_bool = False
diagnosis_dict_dict = {}

In [None]:
for recommendation_str, diagnosis_list in recommendation_dict.items():
    