## Import Libraries

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
from sklearn.feature_selection import mutual_info_classif             
import operator
import matplotlib.pyplot as plt
import numpy as np  
import pickle
import os
from huggingface_hub import login
from llama_index import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    ServiceContext,
)
from llama_index.node_parser import SentenceSplitter
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.llms import HuggingFaceLLM
from transformers import AutoModelForCausalLM, AutoTokenizer
import faiss
from huggingface_hub import login

login(token="Password")
from llama_index.schema import Document
import re
from langchain.embeddings import HuggingFaceEmbeddings
import tkinter as tk
from tkinter import filedialog
import re

## Build RAG pipeline

In [3]:
# Step 1: Load and clean documents
def clean_text(doc: Document) -> Document:
    cleaned = re.sub(r"\b\d+(\.\d+)+\b", "", doc.text)
    return Document(text=cleaned, metadata=doc.metadata)

os.makedirs("pdfs", exist_ok=True)
documents_raw = SimpleDirectoryReader(input_dir="pdfs").load_data()

for doc in documents_raw:
    if not hasattr(doc, "metadata") or doc.metadata is None:
        doc.metadata = {}
    
    if hasattr(doc, "file_path"):
        doc.metadata["name"] = os.path.basename(doc.file_path)
    elif "file_path" in getattr(doc, "metadata", {}):
        doc.metadata["name"] = os.path.basename(doc.metadata["file_path"])
    else:
        doc.metadata["name"] = "Unknown document"

documents = [clean_text(doc) for doc in documents_raw]


#Chunkdocuments, add metadtata
parser = SentenceSplitter(chunk_size=256, chunk_overlap=20)
nodes = []
for doc in documents:
    doc_nodes = parser.get_nodes_from_documents([doc])
    for node in doc_nodes:
        node.metadata["doc_name"] = doc.metadata.get("name", "Unknown document")
    nodes.extend(doc_nodes)


#vector index with embeddings
embed_model = LangchainEmbedding(
   
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
)

faiss_index = faiss.IndexFlatL2(384)
vector_store = FaissVectorStore(faiss_index=faiss_index)


## Model setup

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1,3"
llm = HuggingFaceLLM(
    model_name="hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
    tokenizer_name="hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
    context_window=5800,
    #max_new_tokens=800,
    max_new_tokens=2000,
    generate_kwargs={"temperature": 0.0,
    "do_sample": False,
    },
    device_map="auto",
    tokenizer_kwargs={"use_fast": True},
    model_kwargs={"torch_dtype": "auto"},
)
service_context = ServiceContext.from_defaults(
    embed_model=embed_model,
    llm=llm
)
index = VectorStoreIndex(nodes, vector_store=vector_store, service_context=service_context)



## W-score and hirerarchy file upload

In [5]:


root = tk.Tk()
root.withdraw()

file_path = filedialog.askopenfilename(
    title="Select your CSV file",
    filetypes=[("CSV files", "*.csv")]
)



In [6]:
root = tk.Tk()
root.withdraw()


file_path_1 = filedialog.askopenfilename(
    title="Select your CSV file",
    filetypes=[("CSV files", "*.csv")])

## Brain region selection

In [7]:

df_hierarchy = pd.read_csv(file_path_1).fillna("")
child_parent_dict = dict(zip(df_hierarchy['ROI'], df_hierarchy['parent']))
ROI_level_dict = dict(zip(df_hierarchy['ROI'], df_hierarchy['level']))

df = pd.read_csv(file_path)  

volumetry_rois = [
  "Left_Amygdala", 
  "Left_Temporal_Lobe", 
  "Right_Temporal_Lobe",
  "Left_Hippocampus", 
  "Right_Amygdala", 
  "Right_Hippocampus",
  "Left_Inf-Lat-Vent",
  "Left_Middle_Temporal", 
  "Left_Entorhinal", 
  "Right_Middle_Temporal",
  "Right_Diencephalon",
  "Right_Inf-Lat-Vent",
  "Left_Diencephalon",
  "Right_Inferior_Temporal",
  "Right_Ventral_Diencephalon",
  "Left_Inferior_Temporal",

]
relevance_rois = [
    "Left_Temporal_Lobe",
    "Left_Hippocampus",  
    "Left_Amygdala" ,
    "Left_Parahippocampal" ,
]
cortical_thickness_rois = [
    "Left_Temporal_Lobe", 
    "Left_Entorhinal",
    "Right_Temporal_Lobe",
    "Left_Middle_Temporal",
    "Left_Inferior_Temporal",
    "Right_Entorhinal",
    "Left_Superior_Temporal" ,
]

  df = pd.read_csv(file_path)


# Part 1 : Generation of findings from W-scores

In [None]:
def pathology_textual_report(
    df, row_idx, vol_rois, rel_rois, cort_rois,
    threshold=2.0, flip=-1, child_parent_dict=None, ROI_level_dict=None, empty_cortThk_columns=None
):
    row = df.iloc[row_idx]
    act_rois, vol_rois_selected, cort_rois_selected = [], [], []
    
    if empty_cortThk_columns is None:
        empty_cortThk_columns = set()

    for roi in rel_rois:
        val = row.get(roi + "_rel")
        if val is not None and val != "" and float(val) < flip * threshold:
            act_rois.append(roi)

    for roi in vol_rois:
        val = row.get(roi + "_vol")
        if val is not None and val != "" and (float(val) < flip * threshold or float(val) > threshold):
            vol_rois_selected.append(roi)

    for roi in cort_rois:
        val = row.get(roi + "_cortThk")
        if val is not None and val != "" and float(val) < flip * threshold:
            cort_rois_selected.append(roi)

    agree3 = set(act_rois) & set(vol_rois_selected) & set(cort_rois_selected)
    agree2 = (set(act_rois) & set(vol_rois_selected)) | \
             (set(vol_rois_selected) & set(cort_rois_selected)) | \
             (set(act_rois) & set(cort_rois_selected))

    roi_interest = list(agree3.union(agree2))

    # Corrected recursive hierarchy logic
    def recur(roi_list, level):
        if level < 1:
            return roi_list
        roi_list_copy = roi_list.copy()
        for roi in roi_list:
            parent_roi = child_parent_dict.get(roi, "")
            parent_level = ROI_level_dict.get(parent_roi, -1)
            if ROI_level_dict.get(roi, 0) == level and parent_roi in roi_list and parent_level < level:
                roi_list_copy.remove(roi)
        return recur(roi_list_copy, level - 1)

    if child_parent_dict and ROI_level_dict and roi_interest:
        levels = [ROI_level_dict.get(r, 0) for r in roi_interest]
        max_level = max(levels)
        roi_interest = recur(roi_interest, max_level)

    pathology_lines = []
    for ROI in roi_interest:
        vol_val = row.get(ROI + "_vol")
        rel_val = row.get(ROI + "_rel")
        #cort_val = row.get(ROI + "_cortThk")

        vals = []
        if vol_val is not None:
            vals.append(abs(float(vol_val)))
        if rel_val is not None:
            vals.append(abs(float(rel_val)))
        """if cort_val is not None and ROI not in empty_cortThk_columns:
            vals.append(abs(float(cort_val)))"""

        if not vals:
            continue

        avg = sum(vals) / len(vals)

        severity = "Strong" if avg > 4 else "Moderate" if avg > 3 else "Mild" if avg > 2 else None
        if severity:
            flag = 'atrophied' if float(vol_val) < 0 else 'enlarged'
            vals_str = f"volume w-score: {float(vol_val):.2f}, relevance w-score: {float(rel_val):.2f}"
            pathology_lines.append(
                f"{severity} pathology in {flag} {ROI.replace('_', ' ')} ({vals_str})"
            )

    return "\n".join(pathology_lines) if pathology_lines else "No regions show significant pathology."



## Prompt Build

In [9]:
def build_pathology_prompt(df, row_idx, vol_rois, rel_rois, cort_rois):
    row = df.iloc[row_idx]
    age = row.get('age_vol', row.get('age_rel', row.get('age_cortThk', 'Unknown')))
    sex_val = row.get('sex1f_vol', row.get('sex1f_rel', row.get('sex1f_cortThk', 'Unknown')))
    sex = "male" if str(sex_val) == "0" else "female"
    prompt_lines = [
        f"A {age}-year-old {sex} patient underwent structural MRI.",
        "Here are the W-scores for selected brain regions:"
    ]
    prompt_lines.append("\nVolumetry W-scores:")
    for roi in vol_rois:
        col = roi + "_vol"
        val = round(float(row[col]), 2) if col in df.columns else "N/A"
        prompt_lines.append(f"- {roi.replace('_', ' ')}: {val}")
    prompt_lines.append("\nRelevance W-scores:")
    for roi in rel_rois:
        col = roi + "_rel"
        val = round(float(row[col]), 2) if col in df.columns else "N/A"
        prompt_lines.append(f"- {roi.replace('_', ' ')}: {val}")
    prompt_lines.append("\nCortical Thickness W-scores:")
    for roi in cort_rois:
        col = roi + "_cortThk"
        val = round(float(row[col]), 2) if col in df.columns else "N/A"
        prompt_lines.append(f"- {roi.replace('_', ' ')}: {val}")

    # Instructions
    prompt_lines.append(
        "\nInstructions:"
        "\n1. For all region, check the Volume W-score, Relevance W-score, and Cortical Thickness W-score (if available)."
        "\n2. If ONLY TWO or more W-scores of the same region have an absolute value greater than 2, ONLY then that region is considered pathological."
        "\nFor such pathological region, calculate the average of the absolute values of these W-scores."
        "\nThe severity is 'Mild' if the average absolute w-score is >2, 'Moderate' if >3, and 'Strong' if >4."
        "\nUse 'atrophied' if the volume is negative, 'enlarged' if the volume is positive."
         "\nImportant: When deciding which regions to summarize, consult the file titled 'ROI_Parent_Child_Hirerachy.pdf' in the document database. DO NOT include child regions if their parent region is already listed as pathological according to the relationships defined in that file."
         "\n[IMPORTANT] OUTPUT TEMPLATE :<severity> pathology in atrophied <region>: (volume w-score: , relevance w-score:) "
        "\nOutput one line per region. If there are NO pathologic regions, write: 'No regions show significant pathology.'"
        "\nDo NOT provide any diagnosis or extra commentary—just the list."
        "\n[REMEMBER] There should be more than two or more W-scores whoes absolute value greater than 2 to be considered as pathological region to report"
    )

    return "\n".join(prompt_lines)



In [10]:
def find_empty_cortThk_cols(df, cort_rois):
    empty_cols = set()
    for roi in cort_rois:
        col = roi + "_cortThk"
        if col not in df.columns or df[col].isnull().all():
            empty_cols.add(roi)
    return empty_cols

In [11]:
import re

def normalize_line(line):
    # Remove numbering, e.g., '1. ', '2. ', etc.
    line = re.sub(r"^\s*\d+\.\s*", "", line)
    # Remove 'answer:', '```', etc.
    line = re.sub(r"^(answer:|```|additional notes:|\*)\s*", "", line, flags=re.IGNORECASE)
    # Remove extra underscores, dashes, or empty dividers
    line = re.sub(r"^[-_]+$", "", line)
    return line.strip().lower()


# Feedback Generation

In [None]:
def feedback_loop_textual(
  df, row_idx, vol_rois, rel_rois, cort_rois, query_engine,
  child_parent_dict, ROI_level_dict, max_attempts=3
):
    empty_cortThk_columns = find_empty_cortThk_cols(df, cort_rois)
    gold_text = pathology_textual_report(
        df, row_idx, vol_rois, rel_rois, cort_rois,
        threshold=2.0,
        flip=-1,
        child_parent_dict=child_parent_dict,
        ROI_level_dict=ROI_level_dict,
        empty_cortThk_columns=empty_cortThk_columns
    ).strip().lower()

    prompt = build_pathology_prompt(df, row_idx, vol_rois, rel_rois, cort_rois)
    attempt = 0
    attempt_outputs = []
    while attempt < max_attempts:
        response = str(query_engine.query(prompt)).strip().lower()
        attempt_outputs.append(response)

        gold_lines = set([normalize_line(line) for line in gold_text.split("\n") if normalize_line(line)])
        llm_lines = set([normalize_line(line) for line in response.split("\n") if normalize_line(line)])

        # Child/parent errors
        child_errors = []
        for line in llm_lines:
            for roi, parent_roi in child_parent_dict.items():
                if roi.replace('_', ' ').lower() in line:
                    if parent_roi.replace('_', ' ').lower() in response:
                        child_errors.append((roi, parent_roi))

        missing_lines = gold_lines - llm_lines
        extra_lines = llm_lines - gold_lines

        # Prepare feedback for next LLM attempt (only if disagreement)
        if gold_lines == llm_lines and not child_errors:
            return attempt_outputs  

        feedback_note = ""
        if gold_lines == {"no regions show significant pathology."}:
            if any(line != "no regions show significant pathology." for line in llm_lines):
                feedback_note += "\nYou included extra/incorrect regions or statements:\n"
                for line in extra_lines:
                    if line != "no regions show significant pathology.":
                        feedback_note += f"- {line}\n"
        else:
            if child_errors:
                feedback_note += "\nYou incorrectly included child regions when their parent regions were already listed:\n"
            if missing_lines:
                feedback_note += "\nYou missed these required region summaries:\n"
    
            if extra_lines:
                feedback_note += "\nYou included extra/incorrect regions or statements:\n"
        if feedback_note:
            feedback_note += (
                "Please regenerate the summary and avoid including child regions if their parents are listed."
            )
            prompt += feedback_note

        attempt += 1

   
    return attempt_outputs  



## Main()

In [16]:
rows_to_test = [84]
outputsresults = []
for row_number in rows_to_test:
    query_engine= index.as_query_engine(response_mode="compact")
    attempt_outputs = feedback_loop_textual(
        df, row_number, volumetry_rois, relevance_rois, cortical_thickness_rois,
        query_engine, child_parent_dict, ROI_level_dict
    )

    first_attempt = attempt_outputs[0] if len(attempt_outputs) > 0 else ""
    last_attempt = attempt_outputs[-1] if len(attempt_outputs) > 0 else ""


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [19]:
print(last_attempt)

1. moderate pathology in atrophied left temporal lobe: (volume w-score: -1.93, relevance w-score: -2.28) 
2. moderate pathology in atrophied left hippocampus: (volume w-score: -5.33, relevance w-score: -2.56) 
3. moderate pathology in atrophied left amygdala: (volume w-score: -3.4, relevance w-score: -2.97)


# Part 2:Report generation from the findings of the Part 1

In [20]:
prompt=f"""
You are an expert radiology assistant.

You have access ONLY to the following documents:
1. Alzheimer s Dementia - 2011 - Jack - Introduction to the recommendations from the National Institute on Aging‐Alzheimer s.pdf
2. Alzheimer s Dementia - 2011 - McKhann - The diagnosis of dementia due to Alzheimer s disease Recommendations from the.pdf
3. Alzheimer s Dementia - 2011- Albert - The diagnosis of mild cognitive impairment due to Alzheimer s disease.pdf
4. Alzheimer s Dementia - 2018 - Jack - NIA‐AA Research Framework Toward a biological definition of Alzheimer s disease.pdf
5. DGN Guidelines Diagnosis.pdf
6. Clinical_Significance.json


If a claim is not explicitly supported in one of these documents, say:  
“[Not supported in available sources]”.  
Do NOT use outside knowledge or make assumptions.

---

PATIENT FINDINGS:

{last_attempt}

---

TASK:
1. Write **FINDINGS**: summarize abnormalities with W-scores. For each region,specify the pattern, severity, and w-scores, and briefly discuss the potential clinical and pathophysiological relevance, citing one of the six sources by filename.
2. Write **IMPRESSION**: 
    - Provide a **multi-paragraph, guideline-based summary**:
    - Describe how the observed atrophy patterns fit into the categories of neuroimaging biomarkers and how the framework classifies such changes .
    - Relate the severity to how the framework discusses early, preclinical, or symptomatic stages.
    - Discuss the pattern and severity of atrophy and its implications for neurodegenerative diseases but do not diagnose any disease.
    - Add a short paragraph explaining the clinical significance about each regions in the findings in detail based on this document Clinical_Significance.json.  If no document supports it, clearly state: “Not supported in available sources.”
3. Every claim must be followed by a citation in the format: (Filename, page/section if available).  

Do NOT cite anything outside the five listed documents. Do NOT invent facts.


OUTPUT FORMAT:
FINDINGS:
- …

IMPRESSION:
- …
[IMPORTANT]Show step by step before giving the output
[REMEMBER]Only use the documents to claim and do not claim anything outside the documents
"""

query_engine = index.as_query_engine()
response = query_engine.query(prompt)
print(response)


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


1. Analyze the patient findings and identify the abnormalities with W-scores.

The patient findings indicate moderate pathology in three regions: the left temporal lobe, left hippocampus, and left amygdala. The W-scores for each region are:

* Left temporal lobe: volume W-score = -1.93, relevance W-score = -2.28
* Left hippocampus: volume W-score = -5.33, relevance W-score = -2.56
* Left amygdala: volume W-score = -3.4, relevance W-score = -2.97

2. Write the FINDINGS section.

FINDINGS:
- Moderate pathology in the left temporal lobe, with a volume W-score of -1.93 and a relevance W-score of -2.28, indicating a moderate degree of atrophy (Alzheimer s Dementia - 2011 - Albert - The diagnosis of mild cognitive impairment due to Alzheimer s disease.pdf).
- Moderate pathology in the left hippocampus, with a volume W-score of -5.33 and a relevance W-score of -2.56, indicating a moderate degree of atrophy (Alzheimer s Dementia - 2011 - Albert - The diagnosis of mild cognitive impairment due 