## Import libraries

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
from sklearn.feature_selection import mutual_info_classif             
import operator
import matplotlib.pyplot as plt
import numpy as np  
import pickle
import os
from huggingface_hub import login
from llama_index import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    ServiceContext,
)
from llama_index.node_parser import SentenceSplitter
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.llms import HuggingFaceLLM
from transformers import AutoModelForCausalLM, AutoTokenizer
import faiss
from huggingface_hub import login

login(token="Password")
from llama_index.schema import Document
import re
from langchain.embeddings import HuggingFaceEmbeddings
import tkinter as tk
from tkinter import filedialog
import re

In [3]:
def clean_text(doc: Document) -> Document:
    cleaned = re.sub(r"\b\d+(\.\d+)+\b", "", doc.text)
    return Document(text=cleaned, metadata=doc.metadata)

os.makedirs("pdfs", exist_ok=True)
documents_raw = SimpleDirectoryReader(input_dir="pdfs").load_data()

for doc in documents_raw:
    if not hasattr(doc, "metadata") or doc.metadata is None:
        doc.metadata = {}
    
    if hasattr(doc, "file_path"):
        doc.metadata["name"] = os.path.basename(doc.file_path)
    elif "file_path" in getattr(doc, "metadata", {}):
        doc.metadata["name"] = os.path.basename(doc.metadata["file_path"])
    else:
        doc.metadata["name"] = "Unknown document"

documents = [clean_text(doc) for doc in documents_raw]
#print(f"Loaded and cleaned {len(documents)} documents.")


parser = SentenceSplitter(chunk_size=256, chunk_overlap=20)
nodes = []
for doc in documents:
    doc_nodes = parser.get_nodes_from_documents([doc])
    for node in doc_nodes:
        node.metadata["doc_name"] = doc.metadata.get("name", "Unknown document")
    nodes.extend(doc_nodes)



embed_model = LangchainEmbedding(
   
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


)
faiss_index = faiss.IndexFlatL2(384)
vector_store = FaissVectorStore(faiss_index=faiss_index)


In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1,3"
llm = HuggingFaceLLM(
    model_name="hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
    tokenizer_name="hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
    context_window=5800,
    #max_new_tokens=800,
    max_new_tokens=2000,
    generate_kwargs={"temperature": 0.0,
    "do_sample": False,
    },
    device_map="auto",
    tokenizer_kwargs={"use_fast": True},
    model_kwargs={"torch_dtype": "auto"},
)
service_context = ServiceContext.from_defaults(
    embed_model=embed_model,
    llm=llm
)
index = VectorStoreIndex(nodes, vector_store=vector_store, service_context=service_context)



In [5]:


root = tk.Tk()
root.withdraw()

file_path = filedialog.askopenfilename(
    title="Select your CSV file",
    filetypes=[("CSV files", "*.csv")]
)



In [6]:
root = tk.Tk()
root.withdraw()


file_path_1 = filedialog.askopenfilename(
    title="Select your CSV file",
    filetypes=[("CSV files", "*.csv")])

In [None]:

df_hierarchy = pd.read_csv(file_path_1).fillna("")
child_parent_dict = dict(zip(df_hierarchy['ROI'], df_hierarchy['parent']))
ROI_level_dict = dict(zip(df_hierarchy['ROI'], df_hierarchy['level']))

df = pd.read_csv(file_path)  
volumetry_rois = [
    "Left_Amygdala", 
    "Left_Temporal_Lobe",
    "Right_Temporal_Lobe",
    "Left_Hippocampus", 
    "Right_Amygdala",
    "Right_Hippocampus",
    "Left_Inf-Lat-Vent",
    "Left_Middle_Temporal", 
    "Left_Entorhinal", 
    "Right_Middle_Temporal",
    "Right_Diencephalon",
    "Right_Inf-Lat-Vent",
    "Left_Diencephalon",
    "Right_Inferior_Temporal",
    "Right_Ventral_Diencephalon",
    "Left_Inferior_Temporal",

]
relevance_rois = [
    "Left_Temporal_Lobe",
    "Left_Hippocampus",  
    "Left_Amygdala" ,
    "Left_Parahippocampal" ,
]
cortical_thickness_rois = [
    "Left_Temporal_Lobe", 
    "Left_Entorhinal",
    "Right_Temporal_Lobe",
    "Left_Middle_Temporal",
    "Left_Inferior_Temporal",
    "Right_Entorhinal",
    "Left_Superior_Temporal" ,
]

  df = pd.read_csv(file_path)


In [8]:
def truncate_1_decimal(x):
    
    try:
        if x is None or x == "" or str(x).lower() == "none":
            return "not available"
        sgn = '-' if float(x) < 0 else ''
        s = str(abs(float(x)))
        if '.' in s:
            integer, decimal = s.split('.')
            return sgn + integer + '.' + decimal[:2]
        return sgn + s
    except Exception:
        return "not available"
    
import re

def lines_equivalent(line1, line2):
    
    def parse(line):
        d = {'severity': None, 'atrophy': None, 'region': None, 'scores': {}}
        m = re.match(r"(mild|moderate|strong) pathology in (atrophied|enlarged) ([^:]+):\s*\((.*)\)", line, re.IGNORECASE)
        if not m:
            return d
        d['severity'] = m.group(1).lower()
        d['atrophy'] = m.group(2).lower()
        d['region'] = m.group(3).strip().lower()
        score_block = m.group(4)
        
        for part in score_block.split(','):
            part = part.strip()
            sm = re.match(r"([a-zA-Z ]+w-score[s]?):\s*([-\d.]+|not available|none|n/a)?", part, re.IGNORECASE)
            if sm:
                key = sm.group(1).strip().lower().replace("scores", "score")
                val = sm.group(2)
                if not val or val.strip().lower() in ("not available", "none", "n/a", ""):
                    val = None
                d['scores'][key] = val
        return d

    a = parse(line1)
    b = parse(line2)
    if (a['region'], a['atrophy'], a['severity']) != (b['region'], b['atrophy'], b['severity']):
        return False
    keys = set(a['scores']) | set(b['scores'])
    for k in keys:
        va = a['scores'].get(k)
        vb = b['scores'].get(k)
        if va is None and vb is None:
            continue
        if va is not None and vb is not None:
            if str(va) != str(vb):
                try:
                    if float(va) != float(vb):
                        return False
                except Exception:
                    return False
        else:
           
            if (va is None and (vb is None or vb == "not available")) or (vb is None and (va is None or va == "not available")):
                continue
            return False
    return True


In [9]:
def pathology_textual_report(
    df, row_idx, vol_rois, rel_rois, cort_rois,
    threshold=2.00, flip=-1, child_parent_dict=None, ROI_level_dict=None, empty_cortThk_columns=None
):
    row = df.iloc[row_idx]
    act_rois, vol_rois_selected, cort_rois_selected = [], [], []

    if empty_cortThk_columns is None:
        empty_cortThk_columns = set()

    # Abnormal relevance
    for roi in rel_rois:
        val = row.get(roi + "_rel")
        if val is not None and val != "" and abs(float(val)) >= threshold:
            act_rois.append(roi)
    # Abnormal volume
    for roi in vol_rois:
        val = row.get(roi + "_vol")
        if val is not None and val != "" and abs(float(val)) >= threshold:
            vol_rois_selected.append(roi)
    # Abnormal cortical thickness
    for roi in cort_rois:
        val = row.get(roi + "_cortThk")
        if val is not None and val != "" and abs(float(val)) >= threshold:
            cort_rois_selected.append(roi)

    # Agreement
    agree3 = set(act_rois) & set(vol_rois_selected) & set(cort_rois_selected)
    agree2 = (set(act_rois) & set(vol_rois_selected)) | \
             (set(vol_rois_selected) & set(cort_rois_selected)) | \
             (set(act_rois) & set(cort_rois_selected))

    roi_interest = list(agree3.union(agree2))

    # Hierarchy filter
    def recur(roi_list, level):
        if level < 1:
            return roi_list
        roi_list_copy = roi_list.copy()
        for roi in roi_list:
            parent_roi = child_parent_dict.get(roi, "")
            parent_level = ROI_level_dict.get(parent_roi, -1)
            if ROI_level_dict.get(roi, 0) == level and parent_roi in roi_list and parent_level < level:
                roi_list_copy.remove(roi)
        return recur(roi_list_copy, level - 1)

    if child_parent_dict and ROI_level_dict and roi_interest:
        levels = [ROI_level_dict.get(r, 0) for r in roi_interest]
        max_level = max(levels)
        roi_interest = recur(roi_interest, max_level)

    pathology_lines = []
    for ROI in roi_interest:
        vol_val = row.get(ROI + "_vol") if ROI in vol_rois else None
        rel_val = row.get(ROI + "_rel") if ROI in rel_rois else None
        cort_val = row.get(ROI + "_cortThk") if ROI in cort_rois else None

        # Count number of abnormal values (>= threshold)
        vals = []
        abnormal_count = 0
        if vol_val is not None and vol_val != "":
            vals.append(abs(float(vol_val)))
            if abs(float(vol_val)) >= threshold:
                abnormal_count += 1
        if rel_val is not None and rel_val != "":
            vals.append(abs(float(rel_val)))
            if abs(float(rel_val)) >= threshold:
                abnormal_count += 1
        if cort_val is not None and ROI not in empty_cortThk_columns and cort_val != "":
            vals.append(abs(float(cort_val)))
            if abs(float(cort_val)) >= threshold:
                abnormal_count += 1

        # Reporting if at least two abnormal scores
        if abnormal_count < 2:
            continue

        if not vals:
            continue

        avg = sum(vals) / len(vals)
        severity = "Strong" if avg >= 4.00 else "Moderate" if avg >= 3.00 else "Mild" if avg >= 2.00 else None
        if severity:
            flag = 'atrophied' if (vol_val is not None and vol_val != "" and float(vol_val) < 0) else 'enlarged'
            vol_str = truncate_1_decimal(vol_val) if vol_val is not None and vol_val != "" else "not available"
            rel_str = truncate_1_decimal(rel_val) if rel_val is not None and rel_val != "" else "not available"
            cort_str = truncate_1_decimal(cort_val) if cort_val is not None and ROI not in empty_cortThk_columns and cort_val != "" else "not available"
            pathology_lines.append(
                f"{severity} pathology in {flag} {ROI.replace('_', ' ')}: (volume w-score: {vol_str}, relevance w-score: {rel_str}, cortical thickness w-score: {cort_str})"
            )

    return "\n".join(pathology_lines) if pathology_lines else "No pathology for this patient."

In [10]:
def build_pathology_prompt(df, row_idx, vol_rois, rel_rois, cort_rois):
    row = df.iloc[row_idx]
    age = row.get('age_vol', row.get('age_rel', row.get('age_cortThk', 'Unknown')))
    sex_val = row.get('sex1f_vol', row.get('sex1f_rel', row.get('sex1f_cortThk', 'Unknown')))
    sex = "female" if str(sex_val) == "1" else "male"

    # Volume W-scores
    vol_lines = []
    for roi in vol_rois:
        col = roi + "_vol"
        if col in df.columns:
            try:
                val = float(row[col])
                val_str = truncate_1_decimal(val)
                vol_lines.append(f"- {roi.replace('_', ' ')}: {val_str}")
            except:
                pass

    # Relevance W-scores
    rel_lines = []
    for roi in rel_rois:
        col = roi + "_rel"
        if col in df.columns:
            try:
                val = float(row[col])
                val_str = truncate_1_decimal(val)
                rel_lines.append(f"- {roi.replace('_', ' ')}: {val_str}")
            except:
                pass

    # Cortical Thickness W-scores
    cort_lines = []
    for roi in cort_rois:
        col = roi + "_cortThk"
        if col in df.columns:
            try:
                val = float(row[col])
                val_str = truncate_1_decimal(val)
                cort_lines.append(f"- {roi.replace('_', ' ')}: {val_str}")
            except:
                pass

    prompt = f"""
[INSTRUCTION]
You are a medical AI assistant.

Your task is to analyze the following W-scores a {age} year old {sex} for various brain regions from a patient's MRI. For each region, follow these rules exactly:

Instructions:
1. For all region, check the Volume W-score, Relevance W-score, and Cortical Thickness W-score (if available).
2. If two or more W-scores for the same region have an absolute value greater than 2, that region is considered pathological.
3. For each such region, calculate the average of the absolute values of these W-scores.
- Example (Mild): -2.52, -2.70 → (2.52+2.70)/2 = 2.61 ("Mild")
- Example (Moderate): -3.5, -3.2 → (3.5+3.2)/2 = 3.35 ("Moderate")
- Example (Strong): -4.8, -4.5 → (4.8+4.5)/2 = 4.65 ("Strong")
- Example (Mild): -2.52, -2.70 , -2.1→ (2.52+2.70+2.1)/3 = 2.44 ("Mild")
- Example (Moderate): -3.5, -3.2 , -3.4→ (3.5+3.2+3.4)/3 = 3.36 ("Moderate")
- Example (Strong): -4.8, -4.5 , -4.3 → (4.8+4.5+4.3)/3 = 4.53 ("Strong")
4. Assign severity:
- "Strong pathology" if average > 4
- "Moderate pathology" if average > 3
- "Mild pathology" if average > 2
- "NO Pathology" if average < 2
5. If the volume W-score is negative, describe the region as "atrophied"; if positive, as "enlarged".
6. For each qualifying region, output exactly ONE line in this format (substitute your findings):
<Example>Moderate pathology in atrophied <region>: (volume w-score: -2.8, relevance w-score:-2.97, Cortical Thickness W-scores:-2.02)<?Example>
<Example>mild pathology in atrophied <region>: (volume w-score: -2.4, relevance w-score:-2.97)<?Example>
7. Do NOT include regions with fewer than two abnormal W-scores.

For each qualifying region, output exactly ONE summary line in this format:
<Example>Moderate pathology in atrophied <region>: (volume w-score: -2.8, relevance w-score:-2.97, Cortical Thickness W-scores:-2.02)<?Example>

Example:
Region: Left Temporal Lobe
- Volume W-score: -2.97 (abnormal)
- Relevance W-score: -3.67 (abnormal)
- Cortical Thickness W-score: -2.02 (abnormal)
There are two or more W-scores otherwise STOP
All three W-scores are abnormal (|W| > 2).
Average: (2.97 + 3.67 + 2.02) / 3 = 2.89
Severity: Mild pathology
Volume W-score is negative, so region is atrophied.
Output: mild pathology in atrophied Left Temporal Lobe: (volume w-score: -2.97, relevance w-score: -3.67, Cortical Thickness W-scores: -2.02)

Example:
Region: Left Amygdala
- Volume W-score: -1.84 (normal)
- Relevance W-score: not available
- Cortical Thickness W-score: not available
Step 1: Only one W-score is present.
Step 2: Number of abnormal W-scores (|W| > 2): 0.
Step 3: Fewer than two abnormal W-scores, so region is not pathological.
No pathology for this region.


[END OF INSTRUCTION]

Here are the region W-scores for this patient:
Volume W-scores:
{chr(10).join(vol_lines)}

Relevance W-scores:
{chr(10).join(rel_lines)}

Cortical Thickness W-scores:
{chr(10).join(cort_lines)}


[REMEMBER: Show step by step calculation then output summary lines in the specified format.]
[FINAL FILTER]
Using the "ROI_Parent_Child_Hirerachy" document:
- Carefully review the output summary.
- If any region is a child of another region also present in the list, remove the child region(s).
- Output the corrected summary list.
[END OF FINAL FILTER]
"""
    return prompt



In [11]:
def find_empty_cortThk_cols(df, cort_rois):
    empty_cols = set()
    for roi in cort_rois:
        col = roi + "_cortThk"
        if col not in df.columns or df[col].isnull().all():
            empty_cols.add(roi)
    return empty_cols

In [12]:
import re

def normalize_line(line):
    # Remove numbering, e.g., '1. ', '2. ', etc.
    line = re.sub(r"^\s*\d+\.\s*", "", line)
    # Remove 'answer:', '```', etc.
    line = re.sub(r"^(answer:|```|additional notes:|\*)\s*", "", line, flags=re.IGNORECASE)
    # Remove extra underscores, dashes, or empty dividers
    line = re.sub(r"^[-_]+$", "", line)
    return line.strip().lower()

In [13]:
import re
from collections import Counter

def extract_summary_lines(response_text):

    import re

    def is_no_pathology_present(text):
        s = "".join(text.strip().lower().replace(".", "").split())
        return "nopathologyforthispatient" in s

    resp_lower = response_text.lower()

  
    filtered_starts = [
        'after applying the "roi_parent_child_hierarchy" filter:',
        "after applying the 'roi_parent_child_hierarchy' filter:",
        "[final filter]",
        'output:'
    ]
    filtered_block = None
    for start in filtered_starts:
        idx = resp_lower.find(start)
        if idx != -1:
            filtered_block = response_text[idx+len(start):]
            break

   

    def scan_lines(block):
        lines = []
        for line in block.strip().split("\n"):
            cleaned = line.strip().lstrip("-*0123456789. ")
            if (
                cleaned.lower().startswith(("mild pathology", "moderate pathology", "strong pathology"))
                and "w-score" in cleaned
            ):
                lines.append(cleaned)
        return lines
    


   
    if filtered_block:
        region_lines = scan_lines(filtered_block)
        if region_lines:
            return region_lines
        
        if is_no_pathology_present(filtered_block):
            return ["No pathology for this patient."]

   
    region_lines = scan_lines(response_text)
    if region_lines:
        return region_lines

    
    
    if is_no_pathology_present(resp_lower):
        return ["No pathology for this patient."]

    
    return []



def get_region_from_summary_line(line):
    
    m = re.search(r'pathology in (atrophied|enlarged) ([^:]+):', line, re.IGNORECASE)
    if m:
        return m.group(2).strip().lower()
    return None

def remove_duplicate_regions(summary_lines):
   
    seen = set()
    result = []
    for line in summary_lines:
        region = get_region_from_summary_line(line)
        if region and region not in seen:
            seen.add(region)
            result.append(line)
    return result


def normalize_line(line):
    return line.strip().lower().replace('  ', ' ').replace("w-scores", "w-score")


def feedback_loop_textual(
    df, row_idx, vol_rois, rel_rois, cort_rois, query_engine,
    child_parent_dict, ROI_level_dict, max_attempts=3, max_feedback_regions=2
):
    empty_cortThk_columns = find_empty_cortThk_cols(df, cort_rois)
    gold_text = pathology_textual_report(
        df, row_idx, vol_rois, rel_rois, cort_rois,
        threshold=2.0,
        flip=-1,
        child_parent_dict=child_parent_dict,
        ROI_level_dict=ROI_level_dict,
        empty_cortThk_columns=empty_cortThk_columns
    ).strip().lower()
    gold_lines = set([normalize_line(line) for line in gold_text.split("\n") if normalize_line(line)])

    prompt = build_pathology_prompt(df, row_idx, vol_rois, rel_rois, cort_rois)
    attempt = 0
    attempt_outputs = []
    while attempt < max_attempts:
        
        response = str(query_engine.query(prompt)).strip()
        attempt_outputs.append(response)

        llm_summary = extract_summary_lines(response)

        if llm_summary == ["No pathology for this patient."]:
            final_lines = llm_summary
        else:
            final_lines = remove_duplicate_regions(llm_summary)

        llm_lines = set([normalize_line(line) for line in llm_summary])

       


 
        missing_lines = []
        unmatched_llm_lines = list(llm_lines)  # Copy
        for gold in gold_lines:
            found = False
            for llm in unmatched_llm_lines:
                if lines_equivalent(gold, llm):
                    found = True
                    unmatched_llm_lines.remove(llm)
                    break
            if not found:
                missing_lines.append(gold)
        extra_lines = unmatched_llm_lines






        if not missing_lines and not extra_lines: 
            return attempt_outputs

        feedback_note = ""
        lines_for_feedback = "\n".join(llm_summary[:max_feedback_regions]) if llm_summary else "No pathology for this patient."

        # CASE 2: Too many errors -- force full redo
        if len(set(missing_lines) | set(extra_lines)) > max_feedback_regions:
            feedback_note = (
                "\nYour previous output had too many errors. Please repeat the calculation and filtering from scratch according to the instructions. "
                "Be careful to include only correct pathological regions and apply filtering rules for child regions. "
                "If there are no pathological regions, output: 'No pathology for this patient.'"
            )
            prompt = (
                f"{feedback_note}\n\n"
                f"Previous summary lines were:\n{lines_for_feedback}\n\n"
                f"{build_pathology_prompt(df, row_idx, vol_rois, rel_rois, cort_rois)}"
            )
        
        elif extra_lines and not missing_lines:
            feedback_note = (
                "\nThe following is a list of summary lines generated in the previous step:\n\n"
                f"{lines_for_feedback}\n\n"
                "Using the \"ROI_Parent_Child_Hirerachy\" document:\n"
                "- Carefully review the list above.\n"
                "- If any region is a child of another region also present in the list, remove the child region(s).\n"
                "- Output the corrected summary list."
            )
            prompt = feedback_note
        
        elif missing_lines:
            feedback_note = (
                "\nYou missed a required region in your output. Please do not repeat same mistakes and SHOW STEP BY STEP CALCULATION before output summary lines, "
                "and be careful to include all correct regions based on the calculation and filtering rules. "
                "If there are no pathological regions, output: 'No pathology for this patient.'"
            )
            prompt = (
                f"{feedback_note}\n\n"
                f"Previous summary lines were:\n{lines_for_feedback}\n\n"
                f"{build_pathology_prompt(df, row_idx, vol_rois, rel_rois, cort_rois)}"
            )
        else:
            
            feedback_note = (
                "Unable to automatically classify the error. Please repeat the calculation and filtering strictly as per instructions."
            )
            prompt = (
                f"{feedback_note}\n\n"
                f"{build_pathology_prompt(df, row_idx, vol_rois, rel_rois, cort_rois)}"
            )

        attempt += 1

    
    return attempt_outputs



In [14]:
rows_to_test = [3515]
outputsresults = []

for row_number in rows_to_test:
    query_engine= index.as_query_engine(response_mode="compact")
    attempt_outputs = feedback_loop_textual(
        df, row_number, volumetry_rois, relevance_rois, cortical_thickness_rois,
        query_engine, child_parent_dict, ROI_level_dict
    )

   
    first_attempt = attempt_outputs[0] if len(attempt_outputs) > 0 else ""
    last_attempt = attempt_outputs[-1] if len(attempt_outputs) > 0 else ""

    
   







Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [15]:
print(last_attempt)

1. For each region, check the Volume W-score, Relevance W-score, and Cortical Thickness W-score (if available).
2. If two or more W-scores for the same region have an absolute value greater than 2, that region is considered pathological.
3. For each such region, calculate the average of the absolute values of these W-scores.
4. Assign severity:
- "Strong pathology" if average > 4
- "Moderate pathology" if average > 3
- "Mild pathology" if average > 2
- "NO Pathology" if average < 2
5. If the volume W-score is negative, describe the region as "atrophied"; if positive, as "enlarged".
6. For each qualifying region, output exactly ONE line in this format (substitute your findings):
<Example>Moderate pathology in atrophied <region>: (volume w-score: -2.8, relevance w-score:-2.97, Cortical Thickness W-scores:-2.02)<?Example>
7. Do NOT include regions with fewer than two abnormal W-scores.

Region: Left Entorhinal
- Volume W-score: -3.58 (abnormal)
- Relevance W-score: not available (normal)


In [16]:
#Extraction of summary lines
last_attempt_summary = extract_summary_lines(last_attempt)
print(last_attempt_summary)
   

['mild pathology in atrophied Left Entorhinal: (volume w-score: -3.58, Cortical Thickness W-scores: -2.09)', 'mild pathology in atrophied Left Entorhinal: (volume w-score: -3.58, Cortical Thickness W-scores: -2.09)']


In [17]:
prompt=f"""
You are an expert radiology assistant.

You have access ONLY to the following documents:
1. Alzheimer s Dementia - 2011 - Jack - Introduction to the recommendations from the National Institute on Aging‐Alzheimer s.pdf
2. Alzheimer s Dementia - 2011 - McKhann - The diagnosis of dementia due to Alzheimer s disease Recommendations from the.pdf
3. Alzheimer s Dementia - 2011- Albert - The diagnosis of mild cognitive impairment due to Alzheimer s disease.pdf
4. Alzheimer s Dementia - 2018 - Jack - NIA‐AA Research Framework Toward a biological definition of Alzheimer s disease.pdf
5. DGN Guidelines Diagnosis.pdf
6. Clinical_Significance.json


If a claim is not explicitly supported in one of these documents, say:  
“[Not supported in available sources]”.  
Do NOT use outside knowledge or make assumptions.

---

PATIENT FINDINGS:

{last_attempt_summary}

---

TASK:
1. Write **FINDINGS**: summarize abnormalities with W-scores. For each region,specify the pattern, severity, and w-scores, and briefly discuss the potential clinical and pathophysiological relevance, citing one of the six sources by filename.
2. Write **IMPRESSION**: 
    - Provide a **multi-paragraph, guideline-based summary**:
    - Describe how the observed atrophy patterns fit into the categories of neuroimaging biomarkers and how the framework classifies such changes .
    - Relate the severity to how the framework discusses early, preclinical, or symptomatic stages.
    - Discuss the pattern and severity of atrophy and its implications for neurodegenerative diseases but do not diagnose any disease.
    - Add a short paragraph explaining the clinical significance about each regions in the findings in detail based on this document Clinical_Significance.json.  If no document supports it, clearly state: “Not supported in available sources.”
3. Every claim must be followed by a citation in the format: (Filename, page/section if available).  

Do NOT cite anything outside the five listed documents. Do NOT invent facts.


OUTPUT FORMAT:
FINDINGS:
- …

IMPRESSION:
- …
[IMPORTANT]Show step by step before giving the output
[REMEMBER]Only use the documents to claim and do not claim anything outside the documents
"""

query_engine = index.as_query_engine()
response = query_engine.query(prompt)
print(response)


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


1. Analyze the patient findings and identify the abnormalities with W-scores.

The patient findings indicate mild pathology in the atrophied Left Entorhinal region, with a volume w-score of -3.58 and Cortical Thickness W-scores of -2.09.

2. Write the FINDINGS section.

FINDINGS:
- Mild pathology in the atrophied Left Entorhinal region, with a volume w-score of -3.58 and Cortical Thickness W-scores of -2.09, indicating potential clinical and pathophysiological relevance (Alzheimer s Dementia - 2011 - Albert - The diagnosis of mild cognitive impairment due to Alzheimer s disease.pdf).

3. Write the IMPRESSION section.

IMPRESSION:

The observed atrophy patterns in the Left Entorhinal region fit into the categories of neuroimaging biomarkers, which are discussed in the framework as potential indicators of Alzheimer's disease (Alzheimer s Dementia - 2011 - Jack - Introduction to the recommendations from the National Institute on Aging‐Alzheimer s.pdf). The severity of the atrophy, with a 