In [1]:
import json
from pathlib import Path
import os 
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_text_splitters import RecursiveCharacterTextSplitter

from query import DataLoader

embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
text_splitter = RecursiveCharacterTextSplitter()

  embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
  from tqdm.autonotebook import tqdm, trange


In [5]:

class llmAgent:
    """ """

    def __init__(self, groq_key: str, data_path: Path) -> None:
        """ """
        self.data_path = data_path
        self.data_loader = DataLoader(data_path=data_path)

        llama_31 = "llama-3.1-70b-versatile"
        self.llm = ChatGroq(temperature=0, model=llama_31, api_key=groq_key)

    @staticmethod
    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)
    
    def extract_notes(self, encounter_key: str):
        """
        Extraction Agent/Step 3

        """
        noteText = self.data_loader.get_notes_data(encounter_key=encounter_key)

        from langchain_community.document_loaders import DataFrameLoader

        loader = DataFrameLoader(
            data_frame=noteText,
            page_content_column="NoteText",
            engine="pandas",
        )

        documents = loader.load_and_split()

        from langchain_community.vectorstores import FAISS

        vector_store = FAISS.from_documents(documents, embeddings)

        retriever = vector_store.as_retriever(search_type="similarity", k=5)

        from langchain_core.output_parsers import StrOutputParser
        from langchain_core.runnables import RunnablePassthrough
        from langchain.output_parsers import ResponseSchema, StructuredOutputParser
        from langchain_core.prompts import PromptTemplate

        response_schemas = [
            ResponseSchema(name="Mild to moderate esophagitis", description="return true or false if this diagnosis is present in the notes.", type="bool"),
            ResponseSchema(name="GERD", description="return true or false if this diagnosis is present in the notes.", type="bool"),
            ResponseSchema(name="Peptic Ulcer Disease", description="return true or false if this diagnosis is present in the notes.", type="bool"),
            ResponseSchema(name="Upper GI symptoms", description="return true or false if this diagnosis is present in the notes.", type="bool"),
            ResponseSchema(name="ICU Stress Ulcer Prophylaxis", description="return true or false if this diagnosis is present in the notes.", type="bool"),
            ResponseSchema(name="Barrett's Esophagus", description="return true or false if this diagnosis is present in the notes.", type="bool"),
            ResponseSchema(name="Chronic NSAID use with bleeding risk", description="return true or false if this diagnosis is present in the notes.", type="bool"),
            ResponseSchema(name="Severe esophagitis", description="return true or false if this diagnosis is present in the notes.", type="bool"),
            ResponseSchema(name="Documented history of bleeding GI ulcer", description="return true or false if this diagnosis is present in the notes.", type="bool"),
            ResponseSchema(name="Reasoning", description="return a few sentences of the reasoning for the answer boolean value diagnosis given the notes.", type="str"),
            ]
        output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
        format_instructions = output_parser.get_format_instructions()
        #system = "You are a knowledgeable medical provider who specializes in medication management. Given a list of diagnosis and some snippets from patients notes {context}, answer if the patient notes contain any of the diagnosis."
        prompt = PromptTemplate(
            template="answer the users input as best as possible.\n{format_instructions}\n{input}",
            #input_variables=[("system", system), ("human", "{input}")],
            input_variables=["input"],
            partial_variables={"format_instructions": format_instructions},
                )
     
        # # #   Approach 2   # # #
        rag_chain = (
            RunnablePassthrough.assign(
                context=(lambda x: llmAgent.format_docs(x["context"]))
            )
            | prompt
            | self.llm
            | StrOutputParser()
            #| output_parser
        )

        retrieve_docs = {
            "context": retriever | llmAgent.format_docs,
            "input": RunnablePassthrough(),
        }

        retrieve_docs = (lambda x: x["input"]) | retriever

        chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
            answer=rag_chain
        )

        result = chain.invoke(
            {
                "input": """Based on the information from the note context, does the patient have any of the following:
              1. Mild to moderate esophagitis
              2. GERD 
              3. Peptic Ulcer Disease
              4. Upper GI symptoms
              5. ICU Stress Ulcer Prophylaxis
              6. Barrett's Esophagus
              7. Chronic NSAID use with bleeding risk
              8. Severe esophagitis
              9. Documented history of bleeding GI ulcer
              10. Explain the reasoning for your answer
            Return the answer for each of these as a formatted JSON object with the key being the condition and the value being a boolean value for the first 9.  For the final question, return a string with the reasoning for your answer."""
            }
        )
     
        result_json = result
   

        return result_json


In [6]:
groq_key = os.environ['groqkey']
data_path = Path(
    r"F:\LangChain\data"
)
llm_agent = llmAgent(groq_key=groq_key, data_path=data_path)

In [7]:
encounter_key = "D17AFAD1D7BB68"

notes_json = llm_agent.extract_notes(encounter_key=encounter_key)


In [9]:
notes_json["answer"]

'```json\n{\n\t"Mild to moderate esophagitis": false  // The note context does not mention esophagitis.\n\t"GERD": false  // The note context does not mention GERD.\n\t"Peptic Ulcer Disease": false  // The note context does not mention Peptic Ulcer Disease.\n\t"Upper GI symptoms": false  // The note context does not mention Upper GI symptoms.\n\t"ICU Stress Ulcer Prophylaxis": false  // The note context does not mention ICU Stress Ulcer Prophylaxis.\n\t"Barrett\'s Esophagus": false  // The note context does not mention Barrett\'s Esophagus.\n\t"Chronic NSAID use with bleeding risk": false  // The note context does not mention Chronic NSAID use with bleeding risk.\n\t"Severe esophagitis": false  // The note context does not mention esophagitis.\n\t"Documented history of bleeding GI ulcer": false  // The note context does not mention a history of bleeding GI ulcer.\n\t"Reasoning": "There is not enough information in the note context to determine the presence of any of the listed conditio