# IU X-Ray Full Pipeline with Report Export, Score Graphs, and T5 Fine-Tuning
This notebook covers:
- Clinical label evaluation
- Report generation and refinement
- CSV export of results
- BLEU/ROUGE comparison graphs
- Optional fine-tuning of Flan-T5 model

In [1]:
!pip install torch torchvision transformers torchxrayvision scikit-learn rouge-score nltk matplotlib --quiet

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m51.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m35.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.2 MB/s[0m eta [36m0

In [2]:
from google.colab import drive

# Mount the root Google Drive directory first
drive.mount('/content/drive')

# Then, you can access your specific folder within the Drive
import os
# Create the target directory if it doesn't exist
DATA_DIR = '/content/drive/MyDrive/IU-Xray' # Assuming your Drive path is 'MyDrive/IU-Xray'
if not os.path.exists(DATA_DIR):
  os.makedirs(DATA_DIR)

print(f"IU-Xray directory is mounted at: {DATA_DIR}")

Mounted at /content/drive
IU-Xray directory is mounted at: /content/drive/MyDrive/IU-Xray


In [3]:

import os
import torch
import pandas as pd
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
import torchxrayvision as xrv
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sklearn.metrics import classification_report
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.tokenize import word_tokenize
import nltk
import matplotlib.pyplot as plt
nltk.download("punkt")
from tqdm import tqdm


!nvidia-smi
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


  from tqdm.autonotebook import tqdm
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Wed Apr 23 20:24:02 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   45C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:

densenet_model = xrv.models.DenseNet(weights="densenet121-res224-chex").to(device).eval()
labels = densenet_model.pathologies

tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
report_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small").to(device).eval()


Downloading weights...
If this fails you can run `wget https://github.com/mlmed/torchxrayvision/releases/download/v1/chex-densenet121-d121-tw-lr001-rot45-tr15-sc15-seed0-best.pt -O /root/.torchxrayvision/models_data/chex-densenet121-d121-tw-lr001-rot45-tr15-sc15-seed0-best.pt`
[██████████████████████████████████████████████████]


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [5]:

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x * 255),
    transforms.Lambda(lambda x: x[:1, :, :])
])


In [6]:

reports_df = pd.read_csv(f"{DATA_DIR}/indiana_reports.csv")
projections_df = pd.read_csv(f"{DATA_DIR}/indiana_projections.csv")
frontal_df = projections_df[projections_df["projection"] == "Frontal"]
merged_df = pd.merge(reports_df, frontal_df, on="uid")
merged_df.dropna(subset=["MeSH", "filename", "indication", "impression"], inplace=True)
print("Loaded", len(merged_df), "frontal samples.")


Loaded 3727 frontal samples.


In [7]:

def map_mesh_to_chexpert(mesh_terms):
    mesh_terms = [m.lower().strip() for m in mesh_terms]
    matches = set()
    for m in mesh_terms:
        for label in labels:
            if label.lower() in m:
                matches.add(label)
    return list(matches)


In [8]:

def generate_report(findings, history=None):
    if history:
        prompt = f"Patient history: {history}. Findings: {', '.join(findings)}. Generate a refined radiology report."
    else:
        prompt = f"Findings: {', '.join(findings)}. Generate a radiology report."
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
    with torch.no_grad():
        output = report_model.generate(**inputs, max_length=256)
    return tokenizer.decode(output[0], skip_special_tokens=True)

def evaluate_scores(true_text, pred_text):
    rouge = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=True)
    scores = rouge.score(true_text, pred_text)
    ref = word_tokenize(true_text.lower())
    hyp = word_tokenize(pred_text.lower())
    bleu = sentence_bleu([ref], hyp, smoothing_function=SmoothingFunction().method1)
    return scores['rouge1'].fmeasure, bleu


In [10]:
!pip install nltk --quiet

In [None]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')

results = []
y_true, y_pred = [], []

for _, row in tqdm(merged_df.iterrows(), total=len(merged_df)):
    image_path = f"{DATA_DIR}/images/images_normalized/{row['filename']}"
    if not os.path.exists(image_path): continue

    image = Image.open(image_path).convert("RGB")
    tensor = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        preds = densenet_model(tensor)
    binary_pred = (preds[0] > 0.5).int().cpu().numpy()
    y_pred.append(binary_pred)

    mesh_terms = row['MeSH'].split(";")
    chexpert_labels = map_mesh_to_chexpert(mesh_terms)
    binary_truth = [1 if label in chexpert_labels else 0 for label in labels]
    y_true.append(binary_truth)

    predicted_labels = [label for i, label in enumerate(labels) if binary_pred[i]]
    history = row['indication']
    true_report = row['impression']
    gen_report = generate_report(predicted_labels)
    refined_report = generate_report(predicted_labels, history=history)
    rouge_gen, bleu_gen = evaluate_scores(true_report, gen_report)
    rouge_ref, bleu_ref = evaluate_scores(true_report, refined_report)

    results.append({
        'uid': row['uid'],
        'true_labels': ";".join(chexpert_labels),
        'predicted_labels': ";".join(predicted_labels),
        'history': history,
        'true_impression': true_report,
        'generated_report': gen_report,
        'refined_report': refined_report,
        'bleu_generated': bleu_gen,
        'bleu_refined': bleu_ref,
        'rouge_generated': rouge_gen,
        'rouge_refined': rouge_ref
    })

# Export to CSV
df_results = pd.DataFrame(results)
df_results.to_csv("/content/iu_xray_report_results.csv", index=False)
print("Saved: /content/iu_xray_report_results.csv")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
 13%|█▎        | 473/3727 [18:55<2:12:16,  2.44s/it]

In [None]:

print("=== Clinical Label Accuracy Report ===")
y_true = np.array(y_true)
y_pred = np.array(y_pred)
print(classification_report(y_true, y_pred, target_names=labels, zero_division=0))


In [None]:

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.hist(df_results['bleu_generated'], alpha=0.5, label='Generated', bins=20)
plt.hist(df_results['bleu_refined'], alpha=0.5, label='Refined', bins=20)
plt.title("BLEU Score Distribution")
plt.legend()

plt.subplot(1, 2, 2)
plt.hist(df_results['rouge_generated'], alpha=0.5, label='Generated', bins=20)
plt.hist(df_results['rouge_refined'], alpha=0.5, label='Refined', bins=20)
plt.title("ROUGE-1 F1 Score Distribution")
plt.legend()

plt.tight_layout()
plt.show()
