In [1]:
import pandas as pd
import numpy as np
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
import os
import json
from pathlib import Path

# define groq key
groq_key = (
     os.environ['groqkey']
)

In [2]:
from extraction import llmAgent
from query import DataLoader
from main import main

  embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
  from tqdm.autonotebook import tqdm, trange


In [3]:
# base_path = Path(
#     "/Users/yarg/Library/CloudStorage/OneDrive-Personal/Documents/GitHub/DataSci210_MedicationDeprescriber/Data"
# )
data_path = Path(
    r"F:\LangChain\data"
)

In [4]:
encounter_key="D6253A5CE371EA"

In [5]:
#main(groq_key=groq_key, data_path=data_path,key=encounter_key)

In [6]:
llm_agent = llmAgent(groq_key=groq_key, data_path=data_path)

In [7]:
diagnosis_json = llm_agent.extract_diagnosis(encounter_key=encounter_key)
diagnosis_json

{'Mild to moderate esophagitis': False,
 'GERD': False,
 'Peptic Ulcer Disease': False,
 'Upper GI symptoms': True,
 'ICU Stress Ulcer Prophylaxis': False,
 'Barretts Esophagus': False,
 'Chronic NSAID use with bleeding risk': False,
 'Severe esophagitis': False,
 'Documented history of bleeding GI ulcer': True,
 'Reasoning': 'The patient has a diagnosis of gastrointestinal bleeding (GIB), which indicates that they have experienced bleeding in the gastrointestinal tract. This is consistent with upper GI symptoms and a documented history of bleeding GI ulcer. However, there is no specific information about the other conditions listed, so they are marked as false.'}

In [8]:
encounter_json = llm_agent.extract_encounter_info(encounter_key=encounter_key)
encounter_json

{'Mild to moderate esophagitis': False,
 'GERD': False,
 'Peptic Ulcer Disease': False,
 'Upper GI symptoms': False,
 'ICU Stress Ulcer Prophylaxis': False,
 'Barretts Esophagus': False,
 'Chronic NSAID use with bleeding risk': False,
 'Severe esophagitis': False,
 'Documented history of bleeding GI ulcer': True,
 'Reasoning': "The patient's primary diagnosis is listed as 'Gastrointestinal hemorrhage, unspecified', which suggests a history of bleeding in the gastrointestinal tract. However, there is no specific information about the other conditions listed. Therefore, we can only confirm the presence of a documented history of bleeding GI ulcer based on the provided information."}

In [9]:
notes_json = llm_agent.extract_notes(encounter_key=encounter_key)
notes_json

{'Mild to moderate esophagitis': False,
 'GERD': False,
 'Peptic Ulcer Disease': False,
 'Upper GI symptoms': True,
 'ICU Stress Ulcer Prophylaxis': False,
 'Barretts Esophagus': False,
 'Chronic NSAID use with bleeding risk': False,
 'Severe esophagitis': False,
 'Documented history of bleeding GI ulcer': True,
 'Reasoning': "The patient has a history of gastrointestinal bleeding and has been diagnosed with anemia. The patient's recent endoscopy and colonoscopy results show no active bleeding, but the patient's history of bleeding and anemia suggests a high risk for future bleeding. The patient's medications, including anticoagulants and antiplatelets, increase the risk of bleeding. The patient's condition requires close monitoring and management to prevent future bleeding."}

In [10]:
def replace_underscores_in_keys(json_obj):
    if isinstance(json_obj, dict):
        new_obj = {}
        for key, value in json_obj.items():
            new_key = key.replace('_', ' ')
            new_obj[new_key] = replace_underscores_in_keys(value)
        return new_obj
    elif isinstance(json_obj, list):
        return [replace_underscores_in_keys(item) for item in json_obj]
    else:
        return json_obj

In [None]:
notes_json = replace_underscores_in_keys(notes_json)
notes_json

In [24]:
import re

def extract_json_content(response):
    # Extract content between the curly braces
    match = re.search(r'\{(.*)\}', response, re.DOTALL)
    if match:
        return match.group(1).strip()
    else:
        raise ValueError("No JSON content found between curly braces.")

def convert_to_json(response):
    # Remove comments from the string
    cleaned_response = re.sub(r'//.*', '', response)
    # Add curly braces to form a valid JSON object
    json_string = '{' + cleaned_response.strip() + '}'
    # Parse the JSON string into a dictionary
    response_dict = json.loads(json_string)
    return response_dict


In [None]:
notes_json_parsed = extract_json_content(notes_json)
final_json = convert_to_json(notes_json_parsed)

In [23]:
system = "You are a knowledgeable medical provider who specializes in medication management."
human = "{text}"
prompt = ChatPromptTemplate.from_messages(
    [("system", system), ("human", human)]
)

chain = prompt | llm_agent.llm
response = chain.invoke(
    {
        "text": f"""Based on the following json files, please provide a single explanation of the reasoning given by the 'Reasoning' key. Summarize given equal 
        weight to each. Do not add any additional information, only summarize what is given.
        {diagnosis_json}
        {encounter_json}
        {notes_json}"""
    }
)

In [None]:
response.pretty_print()

In [None]:
response.content

In [13]:
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
class NoteResponse(BaseModel):
            Mild_to_moderate_esoophagitis: bool = Field(description="Mild to moderate esoophagitis")
            GERD: bool = Field(description="GERD")
            Peptic_Ulcer_Disease: bool = Field(description="Peptic Ulcer Disease")
            Upper_GI_symptoms: bool = Field(description="Upper GI symptoms")
            ICU_Stress_Ulcer_Prophylaxis: bool = Field(description="ICU Stress Ulcer Prophylaxis")
            Barretts_Esophagus: bool = Field(description="Barrett's Esophagus")
            Chronic_NSAID_use_with_bleeding_risk: bool = Field(description="Chronic NSAID use with bleeding risk")
            Severe_esophagitis: bool = Field(description="Severe esophagitis")
            Documented_history_of_bleeding_GI_ulcer: bool = Field(description="Documented history of bleeding GI ulcer")
            Reasoning: str = Field(description="Explain the reasoning for your answer")

'{\n  "title": "NoteResponse",\n  "type": "object",\n  "properties": {\n    "Mild_to_moderate_esoophagitis": {\n      "title": "Mild To Moderate Esoophagitis",\n      "description": "Mild to moderate esoophagitis",\n      "type": "boolean"\n    },\n    "GERD": {\n      "title": "Gerd",\n      "description": "GERD",\n      "type": "boolean"\n    },\n    "Peptic_Ulcer_Disease": {\n      "title": "Peptic Ulcer Disease",\n      "description": "Peptic Ulcer Disease",\n      "type": "boolean"\n    },\n    "Upper_GI_symptoms": {\n      "title": "Upper Gi Symptoms",\n      "description": "Upper GI symptoms",\n      "type": "boolean"\n    },\n    "ICU_Stress_Ulcer_Prophylaxis": {\n      "title": "Icu Stress Ulcer Prophylaxis",\n      "description": "ICU Stress Ulcer Prophylaxis",\n      "type": "boolean"\n    },\n    "Barretts_Esophagus": {\n      "title": "Barretts Esophagus",\n      "description": "Barrett\'s Esophagus",\n      "type": "boolean"\n    },\n    "Chronic_NSAID_use_with_bleeding_r

In [None]:
final_dict = {}
for key in diagnosis_json.keys():
    #print(key)
    if not key == "Reasoning":
        diagnosis_bool = diagnosis_json[key]
        encounter_bool = encounter_json[key]
        notes_bool = notes_json[key]
        
        final_dict[key] = diagnosis_bool or encounter_bool or notes_bool
    
final_dict

In [9]:
# DL = DataLoader(data_path=base_path)

In [10]:
# DL.get_label(encounter_key=encounter_key)

In [11]:
# DL.get_diagnosis_data(encounter_key=encounter_key)

In [12]:
# DL.get_encounter_data(encounter_key=encounter_key)