In [None]:
from langchain.output_parsers import PydanticOutputParser
from langchain_core.exceptions import OutputParserException
from pydantic import BaseModel, Field
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import chain
from langchain.prompts import PromptTemplate
from typing import List, Optional, Dict
import time
import openai
import logging

# Import Ollama
from langchain_community.llms import Ollama




class Segment(BaseModel):
    segment: Optional[List[str]] = Field(None, description="List of direct segments from used documents that answers the question")


class HighlightDocuments(BaseModel):
    """Return the specific part of a document used for answering the question."""
    segment_list: List[Dict[str, List[str]]] = Field(
        ..., description="List of pmid"
    )



def HightLight_context(
    input_data,
    llm_model,
    sleep_time=5,
    ):


    system = """You are an advanced assistant for document search and retrieval. You are provided with the following:
    Your task is to identify and extract the exact inline segments from the provided documents that directly correspond to the content used to 
    generate the given answer. The extracted segments must be verbatim snippets from the provided documents, ensuring a word-for-word match with the text 
    in the provided documents.

    For EACH document, provide the document's pmid, and the VERBATIM segments that support the answer.

    Ensure that:
    - (Important) Each segment is an exact match to a part of the document and is fully contained within the document text.
    - The relevance of each segment to the generated answer is clear and directly supports the answer provided.
    - (Important) If a specific document wasn't used to formulate the answer, then do NOT include it in the output.

    Output Format:  A list of dictionaries.  Each dictionary represents a document that contributed to the answer.  The keys of each dictionary MUST be: "pmid", and "segments". The "segments" key holds a LIST of strings, where each string is a verbatim segment from that document.

    Used documents: <docs>{documents}</docs> \n\n User question: <question>{question}</question> \n\n Generated answer: <answer>{generation}</answer>
    <format_instruction>
    {format_instructions}
    </format_instruction>
    """

    parser = PydanticOutputParser(pydantic_object=HighlightDocuments)

    prompt = PromptTemplate(
        template=system,
        input_variables=["documents", "question", "generation"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )
    

    chain = prompt | llm_model | parser
    
    try:
        return chain.invoke(input_data)
    except openai.BadRequestError as e:
        logging.info(f"Too much requests, we are sleeping! \n the error is {e}")
        time.sleep(sleep_time)
        return HightLight_context(input_data=input_data)

    except openai.RateLimitError:
        logging.info("Too much requests exceeding rate limit, we are sleeping!")
        time.sleep(sleep_time)
        return HightLight_context(input_data=input_data)
        
    except OutputParserException:
        logging.info("Error in parsing the instance!")
        pass

In [24]:
# ---------------------- Data Model ----------------------
def format_docs(docs):
    """Formats a list of documents into a single string."""
    formatted_docs = ""
    for pmid, context in docs.items():
        formatted_docs += f"pmid: {pmid}\n"
        formatted_docs += f"Document Text: {context['abstract']}\n\n"
    return formatted_docs

In [25]:
# ---------------------- Dummy Data ----------------------
output_abtract = {
    "pmid28469468": {
      "abstract": "Regulatory and functional aspects of the kynurenine (K) pathway (KP) of tryptophan (Trp) degradation are reviewed. The KP accounts for ~95% of dietary Trp degradation, of which 90% is attributed to the hepatic KP. During immune activation, the minor extrahepatic KP plays a more active role. The KP is rate-limited by its first enzyme, Trp 2,3-dioxygenase (TDO), in liver and indoleamine 2,3-dioxygenase (IDO) elsewhere. TDO is regulated by glucocorticoid induction, substrate activation and stabilization by Trp, cofactor activation by heme, and end-product inhibition by reduced nicotinamide adenine dinucleotide (phosphate). IDO is regulated by IFN-gamma and other cytokines and by nitric oxide. The KP disposes of excess Trp, controls hepatic heme synthesis and Trp availability for cerebral serotonin synthesis, and produces immunoregulatory and neuroactive metabolites, the B3 \"vitamin\" nicotinic acid, and oxidized nicotinamide adenine dinucleotide. Various KP enzymes are undermined in disease and are targeted for therapy of conditions ranging from immunological, neurological, and neurodegenerative conditions to cancer.",
      "title": "Kynurenine Pathway of Tryptophan Metabolism: Regulatory and Functional Aspects."
    },
    "pmid36409557": {
      "abstract": "BACKGROUNDThe kynurenine pathway (KP) has been identified as a potential mediator linking acute illness to cognitive dysfunction by generating neuroactive metabolites in response to inflammation. Delirium (acute confusion) is a common complication of acute illness and is associated with increased risk of dementia and mortality. However, the molecular mechanisms underlying delirium, particularly in relation to the KP, remain elusive.METHODSWe undertook a multicenter observational study with 586 hospitalized patients (248 with delirium) and investigated associations between delirium and KP metabolites measured in cerebrospinal fluid (CSF) and serum by targeted metabolomics. We also explored associations between KP metabolites and markers of neuronal damage and 1-year mortality.RESULTSIn delirium, we found concentrations of the neurotoxic metabolite quinolinic acid in CSF (CSF-QA) (OR 2.26 [1.78, 2.87], P < 0.001) to be increased and also found increases in several other KP metabolites in serum and CSF. In addition, CSF-QA was associated with the neuronal damage marker neurofilament light chain (NfL) (beta 0.43, P < 0.001) and was a strong predictor of 1-year mortality (HR 4.35 [2.93, 6.45] for CSF-QA >= 100 nmol/L, P < 0.001). The associations between CSF-QA and delirium, neuronal damage, and mortality remained highly significant following adjustment for confounders and multiple comparisons.CONCLUSIONOur data identified how systemic inflammation, neurotoxicity, and delirium are strongly linked via the KP and should inform future delirium prevention and treatment clinical trials that target enzymes of the KP.FUNDINGNorwegian Health Association and South-Eastern Norway Regional Health Authorities.",
      "title": "Cerebrospinal fluid quinolinic acid is strongly associated with delirium and mortality in hip-fracture patients."
    },
    "pmid36647830": {
      "abstract": "Tryptophan (Trp) metabolism plays a central role in sleep, mood, and immune system regulation. The kynurenine pathway (KP), which is regulated by the enzymes tryptophan 2,3-dioxygenase (TDO) and indoleamine 2,3 dioxygenase (IDO), which catalyze the conversion of Trp to kynurenine (Kyn), facilitates immune regulation and influences neurocognition. Notably, Kyn metabolites bind the N-methyl-d-aspartate receptor (NMDAR), essential for memory encoding, and in turn, cognition. Aberrant NMDAR activity through agonist binding influences excitability and cell death. In this issue of the JCI, Watne and authors demonstrate that KP pathway end products were elevated in the serum and the cerebrospinal fluid (CSF) of subjects with delirium. This observation provides insight regarding the basis of a variety of commonly observed clinical conditions including sundowning, abnormal sleep-wake cycles in hospitalized patients, neurodegenerative cognitive impairment, radiation-induced cognitive impairment, neurocognitive symptomatology related to COVID-19, and clinical outcomes observed in patients with CNS tumors, such as gliomas.",
      "title": "The kynurenine pathway implicated in patient delirium: possible indications for indoleamine 2,3 dioxygenase inhibitors."
    },
    "pmid36982655": {
      "abstract": "Delirium, a common form of acute brain dysfunction, is associated with increased morbidity and mortality, especially in older patients. The underlying pathophysiology of delirium is not clearly understood, but acute systemic inflammation is known to drive delirium in cases of acute illnesses, such as sepsis, trauma, and surgery. Based on psychomotor presentations, delirium has three main subtypes, such as hypoactive, hyperactive, and mixed subtype. There are similarities in the initial presentation of delirium with depression and dementia, especially in the hypoactive subtype. Hence, patients with hypoactive delirium are frequently misdiagnosed. The altered kynurenine pathway (KP) is a promising molecular pathway implicated in the pathogenesis of delirium. The KP is highly regulated in the immune system and influences neurological functions. The activation of indoleamine 2,3-dioxygenase, and specific KP neuroactive metabolites, such as quinolinic acid and kynurenic acid, could play a role in the event of delirium. Here, we collectively describe the roles of the KP and speculate on its relevance in delirium.",
      "title": "Altered Tryptophan-Kynurenine Pathway in Delirium: A Review of the Current Literature."
    },
    "pmid7145182": {
      "abstract": "The authors have investigated the changes of kalemia and kaluria levels in 40 patients suffering from delirium tremens, and have observed during the acute stage of the illness, a swift fall of kalemia level without an increase of the kaluric excretion. The kalemic values quickly rise up again at the end of the delirium.. The authors think that an adrenergic mechanism is responsible for this quick and transient hypokalemia. The significance of this adrenergic mechanism in the alcohol withdrawal syndrome is at present widely proved.",
      "title": "[Hypopotassemia during delirium tremens. Pathogenesis and clinical significance]."
    }
}

question = "How does kynurenic acid contribute to dilirium?"
generation = "Based on the provided abstracts, kynurenic acid is a neuroactive metabolite in the kynurenine pathway (KP) that could play a role in delirium (pmid36982655)",

input_data =     {
        "documents": format_docs(output_abtract),
        "question": question,
        "generation": generation,
    }

input_data

{'documents': 'pmid: pmid28469468\nDocument Text: Regulatory and functional aspects of the kynurenine (K) pathway (KP) of tryptophan (Trp) degradation are reviewed. The KP accounts for ~95% of dietary Trp degradation, of which 90% is attributed to the hepatic KP. During immune activation, the minor extrahepatic KP plays a more active role. The KP is rate-limited by its first enzyme, Trp 2,3-dioxygenase (TDO), in liver and indoleamine 2,3-dioxygenase (IDO) elsewhere. TDO is regulated by glucocorticoid induction, substrate activation and stabilization by Trp, cofactor activation by heme, and end-product inhibition by reduced nicotinamide adenine dinucleotide (phosphate). IDO is regulated by IFN-gamma and other cytokines and by nitric oxide. The KP disposes of excess Trp, controls hepatic heme synthesis and Trp availability for cerebral serotonin synthesis, and produces immunoregulatory and neuroactive metabolites, the B3 "vitamin" nicotinic acid, and oxidized nicotinamide adenine dinucle

In [26]:
# llm = Ollama(model="qwen2.5:0.5b") 
llm = Ollama(model="deepseek-r1:32b")

In [27]:
llm.invoke('hi')

'<think>\n\n</think>\n\nHello! How can I assist you today? ðŸ˜Š'

In [28]:
HighlightDocuments_dict = HightLight_context(
    input_data, llm)

In [29]:
HighlightDocuments_dict

HighlightDocuments(segment_list=[{'pmid36982655': ['The activation of indoleamine 2,3-dioxygenase, and specific KP neuroactive metabolites, such as quinolinic acid and kynurenic acid, could play a role in the event of delirium.']}])

In [30]:
HighlightDocuments_dict.segment_list[0]

{'pmid36982655': ['The activation of indoleamine 2,3-dioxygenase, and specific KP neuroactive metabolites, such as quinolinic acid and kynurenic acid, could play a role in the event of delirium.']}

In [None]:
def highlight_segments(output_abtract, highlight_list):
    for highlight_dict in highlight_list:
        for pmid, segments in highlight_dict.items():
            print(pmid, segments)
            if pmid in output_abtract:
                highlighted_context = output_abtract[pmid]['abstract']
                for segment in segments:
                    output_abtract[pmid]['abstract'] = highlighted_context.replace(segment, f"<mark>{segment}</mark>")
    return output_abtract

output_abtract = highlight_segments(output_abtract, HighlightDocuments_dict.segment_list)
print(output_abtract)

pmid36982655 ['The activation of indoleamine 2,3-dioxygenase, and specific KP neuroactive metabolites, such as quinolinic acid and kynurenic acid, could play a role in the event of delirium.']
{'pmid28469468': {'abstract': 'Regulatory and functional aspects of the kynurenine (K) pathway (KP) of tryptophan (Trp) degradation are reviewed. The KP accounts for ~95% of dietary Trp degradation, of which 90% is attributed to the hepatic KP. During immune activation, the minor extrahepatic KP plays a more active role. The KP is rate-limited by its first enzyme, Trp 2,3-dioxygenase (TDO), in liver and indoleamine 2,3-dioxygenase (IDO) elsewhere. TDO is regulated by glucocorticoid induction, substrate activation and stabilization by Trp, cofactor activation by heme, and end-product inhibition by reduced nicotinamide adenine dinucleotide (phosphate). IDO is regulated by IFN-gamma and other cytokines and by nitric oxide. The KP disposes of excess Trp, controls hepatic heme synthesis and Trp availa

In [32]:
output_abtract

{'pmid28469468': {'abstract': 'Regulatory and functional aspects of the kynurenine (K) pathway (KP) of tryptophan (Trp) degradation are reviewed. The KP accounts for ~95% of dietary Trp degradation, of which 90% is attributed to the hepatic KP. During immune activation, the minor extrahepatic KP plays a more active role. The KP is rate-limited by its first enzyme, Trp 2,3-dioxygenase (TDO), in liver and indoleamine 2,3-dioxygenase (IDO) elsewhere. TDO is regulated by glucocorticoid induction, substrate activation and stabilization by Trp, cofactor activation by heme, and end-product inhibition by reduced nicotinamide adenine dinucleotide (phosphate). IDO is regulated by IFN-gamma and other cytokines and by nitric oxide. The KP disposes of excess Trp, controls hepatic heme synthesis and Trp availability for cerebral serotonin synthesis, and produces immunoregulatory and neuroactive metabolites, the B3 "vitamin" nicotinic acid, and oxidized nicotinamide adenine dinucleotide. Various KP e