In [16]:
import os, ast, h5py, pydicom, pandas as pd, numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider, Rectangle
from tqdm import tqdm
from ipywidgets import interact, IntSlider


patches_h5 = "/kaggle/input/rsna-patches-dataset/patches.h5"
dataset_h5 = "/kaggle/input/rsna-dataset/dataset.h5"
csv_localizer = "/kaggle/input/rsna-intracranial-aneurysm-detection/train_localizers.csv"
series_root="/kaggle/input/rsna-intracranial-aneurysm-detection/series"

In [20]:
import os, h5py, pydicom, numpy as np, pandas as pd, ast, matplotlib.pyplot as plt
from ipywidgets import interactive
from IPython.display import display

def load_original_volume(series_uid):
    path = os.path.join(series_root, series_uid)
    dcm_files = [f for f in os.listdir(path) if f.endswith(".dcm")]
    if not dcm_files:
        return None, []

    if len(dcm_files) == 1:
        ds = pydicom.dcmread(os.path.join(path, dcm_files[0]), force=True)
        arr = ds.pixel_array.astype(np.float32)
        if arr.ndim == 3:
            return arr, [getattr(ds, "SOPInstanceUID", None)] * arr.shape[0]
        else:
            return arr[np.newaxis, ...], [getattr(ds, "SOPInstanceUID", None)]

    slices, nums, sops = [], [], []
    for f in dcm_files:
        ds = pydicom.dcmread(os.path.join(path, f), force=True)
        if not hasattr(ds, "PixelData"):
            continue
        slices.append(ds.pixel_array.astype(np.float32))
        nums.append(getattr(ds, "InstanceNumber", 0))
        sops.append(getattr(ds, "SOPInstanceUID", None))

    order = np.argsort(nums)
    vol = np.stack([slices[i] for i in order], axis=0)
    sops_order = [sops[i] for i in order]
    return vol, sops_order

def get_coords(uid, sop_order):
    df = pd.read_csv(csv_localizer)
    locs = df[df["SeriesInstanceUID"] == uid]
    coords = []
    for _, row in locs.iterrows():
        try:
            xy = ast.literal_eval(row["coordinates"])
            sop = row["SOPInstanceUID"]
            if sop in sop_order:
                z = sop_order.index(sop)
                coords.append((xy["x"], xy["y"], z))
        except Exception:
            pass
    return coords

def show_volume(vol, coords=None, title="", pix=32):
    if coords:
        z0 = int(coords[0][2])
    else:
        z0 = vol.shape[0] // 2

    def view(z):
        plt.imshow(vol[z], cmap="gray")
        plt.axis("off")
        if coords:
            for (x, y, zc) in coords:
                if zc == z:
                    plt.gca().add_patch(plt.Rectangle(
                        (x - pix/2, y - pix/2), pix, pix,
                        edgecolor='r', facecolor='none', lw=1.5))
        plt.title(f"{title} | slice {z}/{vol.shape[0]-1}")
        plt.show()

    w = interactive(view, z=(0, vol.shape[0]-1, 1))
    w.children[0].value = z0
    display(w)

def compare_series(idx):
    with h5py.File(dataset_h5, "r") as f:
        uid = f["meta"][idx]["series_uid"].decode()
        vol_h5 = f["X"][str(idx)][()]
        label = int(f["y"][idx])

    print(f"\n=== Série {idx} | UID={uid} | Label={label} ===")

    vol_orig, sop_order = load_original_volume(uid)
    if vol_orig is None:
        print("⚠️ Série originale introuvable.")
        return

    coords = get_coords(uid, sop_order)
    print(f"→ {len(coords)} coordonnées trouvées pour {uid}")
    for (x, y, z) in coords:
        print(f"   x={x:.1f}, y={y:.1f}, z={z}")

    print("\n→ Volume HDF5 :")
    show_volume(vol_h5, coords=coords, title=f"HDF5 {uid}")

    print("\n→ Volume original :")
    show_volume(vol_orig, coords=coords, title=f"Original {uid}")

# === exemple ===
compare_series(2)



=== Série 2 | UID=1.2.826.0.1.3680043.8.498.10005158603912009425635473100344077317 | Label=1 ===
→ 1 coordonnées trouvées pour 1.2.826.0.1.3680043.8.498.10005158603912009425635473100344077317
   x=258.4, y=261.4, z=162

→ Volume HDF5 :


interactive(children=(IntSlider(value=162, description='z', max=275), Output()), _dom_classes=('widget-interac…


→ Volume original :


interactive(children=(IntSlider(value=162, description='z', max=275), Output()), _dom_classes=('widget-interac…

In [96]:
idx = 2
with h5py.File(dataset_h5, "r") as f:
    uid = f["meta"][idx]["series_uid"].decode()

print(f"Index {idx} → UID : {uid}")

Index 2 → UID : 1.2.826.0.1.3680043.8.498.10005158603912009425635473100344077317


In [85]:
import os, pydicom, pandas as pd, ast

def get_coords_3d(uid,
                  csv_localizer=csv_path,
                  series_root=series_root):
    df = pd.read_csv(csv_localizer)
    locs = df[df["SeriesInstanceUID"] == uid]

    series_path = os.path.join(series_root, uid)
    dcm_files = [f for f in os.listdir(series_path) if f.endswith(".dcm")]

    # mapping SOPInstanceUID → index de slice
    sop_to_z = {}
    for f in dcm_files:
        ds = pydicom.dcmread(os.path.join(series_path, f), stop_before_pixels=True, force=True)
        sop = getattr(ds, "SOPInstanceUID", None)
        num = getattr(ds, "InstanceNumber", None)
        if sop and num is not None:
            sop_to_z[sop] = num
    z_order = {sop: i for i, sop in enumerate(sorted(sop_to_z, key=lambda s: sop_to_z[s]))}

    coords = []
    for _, row in locs.iterrows():
        try:
            xy = ast.literal_eval(row["coordinates"])
            sop = row["SOPInstanceUID"]
            if sop in z_order:
                coords.append((xy["x"], xy["y"], z_order[sop]))
        except Exception:
            pass
    print(f"{len(coords)} coordonnées 3D trouvées pour {uid}")
    for (x, y, z) in coords:
        print(f"  x={x:.1f}, y={y:.1f}, z={z}")
    return coords

In [88]:
coords = get_coords_3d(uid)
x, y, z = coords[0]

1 coordonnées 3D trouvées pour 1.2.826.0.1.3680043.8.498.10005158603912009425635473100344077317
  x=258.4, y=261.4, z=162


In [81]:
import h5py, numpy as np, matplotlib.pyplot as plt
from ipywidgets import interactive
from IPython.display import display

def show_series(h5_path, idx, slice_idx=0, x=0, y=0, pix=32):
    with h5py.File(h5_path) as f:
        keys = list(f["X"].keys())
        key = keys[int(idx)] if isinstance(idx, (int, np.integer)) else str(idx)
        vol = f["X"][key][()]
        uid = f["meta"][int(idx)]["series_uid"].decode()
        label = int(f["y"][int(idx)])
    print(f"Série {idx} (key={key}) UID={uid} Label={label} Shape={vol.shape}")

    def view(z):
        plt.imshow(vol[z], cmap="gray"); plt.axis("off")
        if z == slice_idx:
            plt.gca().add_patch(plt.Rectangle(
                (x - pix/2, y - pix/2), pix, pix,
                edgecolor='r', facecolor='none', lw=1.5
            ))
        plt.title(f"{uid} | slice {z}/{vol.shape[0]-1}")
        plt.show()

    w = interactive(view, z=(0, vol.shape[0]-1, 1))
    w.children[0].value = slice_idx
    display(w)

In [90]:
show_series(dataset_h5, idx, z, x, y)

Série 2 (key=10) UID=1.2.826.0.1.3680043.8.498.10005158603912009425635473100344077317 Label=1 Shape=(451, 512, 512)


interactive(children=(IntSlider(value=162, description='z', max=450), Output()), _dom_classes=('widget-interac…

In [91]:
import os, pydicom, numpy as np, pandas as pd, ast, matplotlib.pyplot as plt
from ipywidgets import interactive
from IPython.display import display

def show_original_series(uid,
                         csv_localizer="/kaggle/input/rsna-intracranial-aneurysm-detection/train_localizers.csv",
                         series_root="/kaggle/input/rsna-intracranial-aneurysm-detection/series"):
    # --- Charger la série DICOM ---
    series_path = os.path.join(series_root, uid)
    dcm_files = [f for f in os.listdir(series_path) if f.endswith(".dcm")]
    slices, nums, sops = [], [], []

    for f in dcm_files:
        ds = pydicom.dcmread(os.path.join(series_path, f), stop_before_pixels=False, force=True)
        slices.append(ds.pixel_array.astype(np.float32))
        nums.append(getattr(ds, "InstanceNumber", 0))
        sops.append(getattr(ds, "SOPInstanceUID", None))

    order = np.argsort(nums)
    vol = np.stack([slices[i] for i in order], axis=0)
    sop_order = [sops[i] for i in order]
    print(f"Série UID={uid} — Volume shape: {vol.shape}")

    df = pd.read_csv(csv_localizer)
    locs = df[df["SeriesInstanceUID"] == uid]
    coords = []
    for _, row in locs.iterrows():
        try:
            xy = ast.literal_eval(row["coordinates"])
            sop = row["SOPInstanceUID"]
            if sop in sop_order:
                z = sop_order.index(sop)
                coords.append((xy["x"], xy["y"], z))
        except Exception:
            pass

    print(f"{len(coords)} coordonnées trouvées pour {uid}")
    for (x, y, z) in coords:
        print(f"  x={x:.1f}, y={y:.1f}, z={z}")

    def view(z):
        plt.imshow(vol[z], cmap="gray"); plt.axis("off")
        for (X, Y, Z) in coords:
            if Z == z:
                plt.scatter(X, Y, c='red', s=20)
        plt.title(f"{uid} | slice {z}/{vol.shape[0]-1}")
        plt.show()

    w = interactive(view, z=(0, vol.shape[0]-1, 1))
    w.children[0].value = 0
    display(w)


In [92]:
show_original_series(uid)

Série UID=1.2.826.0.1.3680043.8.498.10005158603912009425635473100344077317 — Volume shape: (276, 512, 512)
1 coordonnées trouvées pour 1.2.826.0.1.3680043.8.498.10005158603912009425635473100344077317
  x=258.4, y=261.4, z=162


interactive(children=(IntSlider(value=0, description='z', max=275), Output()), _dom_classes=('widget-interact'…

In [31]:
with h5py.File(dataset_h5, "r") as f:
    vol = f["X"][str(idx)][()]      # (D,H,W)
    label = int(f["y"][idx])
    meta = f["meta"][idx]
    uid = meta["series_uid"].decode() if isinstance(meta["series_uid"], bytes) else meta["series_uid"]

print(f"Série {idx} — UID={uid}")
print(f"Label (Aneurysm Present) = {label}")
print(f"Shape : {vol.shape}, dtype={vol.dtype}")

def show_slice(z):
    plt.figure(figsize=(5,5))
    plt.imshow(vol[z], cmap="gray")
    plt.title(f"Série {idx} | Slice {z}/{vol.shape[0]-1}")
    plt.axis("off")
    plt.show()

interact(show_slice, z=(0, vol.shape[0]-1))


Série 490 — UID=1.2.826.0.1.3680043.8.498.11396089578758057333831232441106022580
Label (Aneurysm Present) = 1
Shape : (657, 512, 512), dtype=uint8


interactive(children=(IntSlider(value=328, description='z', max=656), Output()), _dom_classes=('widget-interac…

<function __main__.show_slice(z)>