In [1]:
import os
import h5py
import xml.etree.ElementTree as ET
import pandas as pd
from tqdm import tqdm

ns = {'ismrmrd': 'http://www.ismrm.org/ISMRMRD'}

def extract_metadata_from_h5(file_path):
    with h5py.File(file_path, "r") as f:
        try:
            header = f['ismrmrd_header'][()].decode("utf-8")
            root = ET.fromstring(header)
        except Exception:
            return []

        def get(path, default="null"):
            el = root.find(path, ns)
            return el.text if el is not None else default

        num_slices = f['kspace'].shape[0]
        filename = os.path.basename(file_path).replace(".h5", "")

        metadata_rows = []
        for slice_idx in range(num_slices):
            row = {
                "filename": filename,
                "slice": slice_idx,
                "anatomy": "brain" if "brain" in file_path.lower() else "knee",
                "contrast": get(".//ismrmrd:measurementInformation/ismrmrd:protocolName"),
                "sequence": get(".//ismrmrd:sequenceParameters/ismrmrd:sequence_type"),
                "TR": get(".//ismrmrd:sequenceParameters/ismrmrd:TR"),
                "TE": get(".//ismrmrd:sequenceParameters/ismrmrd:TE"),
                "TI": get(".//ismrmrd:sequenceParameters/ismrmrd:TI"),
                "flip_angle": get(".//ismrmrd:sequenceParameters/ismrmrd:flipAngle_deg"),
                "pathology": "null"
            }
            metadata_rows.append(row)
        return metadata_rows

# Batch extract from folder
all_metadata = []
fastmri_dir = ""

for fname in tqdm(os.listdir(os.curdir)):
    if fname.endswith(".h5"):
        full_path = os.path.join(fastmri_dir, fname)
        rows = extract_metadata_from_h5(full_path)
        all_metadata.extend(rows)

df = pd.DataFrame(all_metadata)
df.to_csv("metadata_brain.csv", index=False)

100%|██████████████████████████████████████████| 13/13 [00:00<00:00, 923.75it/s]
