# TESTING OF DATA/BART PATHS IN DATA LOADING PIPELINE

Use baselines not baselines_2

In [2]:
from pathlib import Path
path = Path("/DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Preprocessed_CS/multicoil_test/")
if "test" in path.name:
    print("Found 'test' in last part!")

Found 'test' in last part!


change multicoil_test_full to multicoil_test => to get mask in evaluate

In [13]:
file_path = Path("/DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Preprocessed_CS/multicoil_test_full/file1.h5")
new_path = file_path.parent.parent / "multicoil_test" / file_path.name

print(new_path)


/DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Preprocessed_CS/multicoil_test/file1.h5


In [2]:
import pathlib
from pathlib import Path
import os
from typing import Union
import yaml
from warnings import warn

def fetch_dir(
    key: str, data_config_file: Union[str, Path, os.PathLike] = "fastmri_dirs.yaml"
) -> Path:
    """
    Data directory fetcher.

    This is a brute-force simple way to configure data directories for a
    project. Simply overwrite the variables for `knee_path` and `brain_path`
    and this function will retrieve the requested subsplit of the data for use.

    Args:
        key: key to retrieve path from data_config_file. Expected to be in
            ("knee_path", "brain_path", "log_path").
        data_config_file: Optional; Default path config file to fetch path
            from.

    Returns:
        The path to the specified directory.
    """
    data_config_file = Path(data_config_file)
    if not data_config_file.is_file():
        default_config = {  
            "bart_path": "/path/to/bart", # ADDED (but should always be defined...)
            "data_path": "/path/to/NYU_FastMRI", # ADDED
            "log_path": ".",
        }
        with open(data_config_file, "w") as f:
            yaml.dump(default_config, f)

        data_dir = default_config[key]

        warn(
            f"Path config at {data_config_file.resolve()} does not exist. "
            "A template has been created for you. "
            "Please enter the directory paths for your system to have defaults."
        )
    else:
        with open(data_config_file, "r") as f:
            data_dir = yaml.safe_load(f)[key]

    return Path(data_dir)

path_config = pathlib.Path("fastmri_dirs.yaml")
# set defaults based on optional directory config  
data_path = fetch_dir("data_path", path_config) # /path/to/NYU_fastMRI
bart_path = fetch_dir("bart_path", path_config) # ADDED
default_root_dir = fetch_dir("log_path", path_config)
print(f"Using data path: {data_path}")
print(f"Using bart path: {bart_path}")
print(f"Using log path: {default_root_dir}")



Using data path: /DATASERVER/MIC/SHARED/NYU_FastMRI
Using bart path: /DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Preprocessed_CS
Using log path: /DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Results/CSUNet


PreprocessedUnet.py

In [3]:
import pytorch_lightning as pl
from fastmri.data.mri_data import fetch_dir
from preprocessed_transforms import UnetDataTransform
from modified_unet_module import UnetModule
from preprocessed_data_module import FastMriDataModule

train_transform = UnetDataTransform("multicoil")
val_transform = UnetDataTransform("multicoil")
test_transform = UnetDataTransform("multicoil")
# ptl data module - this handles data loaders
data_module = FastMriDataModule(
    data_path=data_path,
    bart_path=bart_path, # added
    challenge="multicoil",
    train_transform=train_transform,
    val_transform=val_transform,
    test_transform=test_transform,
    batch_size=1,
    num_workers=4,
    distributed_sampler="ddp_cpu",
    )



  from .autonotebook import tqdm as notebook_tqdm


data_module.py

In [10]:
root = data_path / "multicoil_val" 
bart_path_test = bart_path / "multicoil_test"
files = list(Path(bart_path_test).glob("*.npy")) 
for fname_cs in sorted(files): # now you iterate over BART output sets (train/val/test)
    ## get original root/fname.h5 back so you can still load all data!
    # first get fname
    fname_stem = fname_cs.stem 
    fname_stem = fname_stem.replace("_cs", "") 
    # get brain or knee folder
    if "brain" in str(fname_stem):
        # root now looks like /path/to/NYU_FastMRI/multicoil_...
        folder = Path(root).parent / "Preprocessed/" 
    else:
        folder = Path(root).parent/ "Knee/" 
    # since BART train/val/test set is different from original fastmri one 
    # need to check all folders of fastmri!!
    for subset in ["multicoil_train", "multicoil_val", "multicoil_test"]:
        folder_sub = folder / subset
        fname = folder_sub / (fname_stem + ".h5")
        if fname.exists():
            print(f"Found original file: {fname}")
            break                       
    assert fname.exists(), f"Original file not found: {fname}" # for debugging
    ## 

Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000000.h5
Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000007.h5
Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000026.h5
Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000033.h5
Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000052.h5
Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000073.h5
Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000108.h5
Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000114.h5
Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000126.h5
Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000153.h5
Found original file: /DATASERVER/MIC/SHARED/NYU_FastMRI/Knee/multicoil_val/file1000178.h5
Found orig

mri_data.py

In [21]:
import numpy as np
root = data_path / "multicoil_val" 
bart_path_train = bart_path / "multicoil_val"
files = list(Path(bart_path_train).iterdir())
dataslice = 0 

## FIRST, replicate how fnames are generated
for fname_cs in sorted(files):
    ## get original root/fname.h5 back so you can still load all data!
    # first get fname
    fname_stem = fname_cs.stem 
    fname_stem = fname_stem.replace("_cs", "") 
    # get brain or knee folder
    if "brain" in str(fname_stem):
        # root now looks like /path/to/NYU_FastMRI/multicoil_...
        folder = Path(root).parent / "Preprocessed/" 
    else:
        folder = Path(root).parent/ "Knee/" 
    # since BART train/val/test set is different from original fastmri one 
    # need to check all folders of fastmri!!
    for subset in ["multicoil_train", "multicoil_val", "multicoil_test"]:
        folder_sub = folder / subset
        fname = folder_sub / (fname_stem + ".h5")
        if fname.exists():
            # print(f"Found original file: {fname}")
            break                       
    assert fname.exists(), f"Original file not found: {fname}" # for debugging
    
    ## Now use fname to replicate data loading in __getitem___
    bart_fname = Path(fname).name.replace('.h5', '_cs.npy')
    bart_fname = Path(bart_path_train) / bart_fname
    try:
        bart_file = np.load(bart_fname)
    except:
        print(f"Failed to load {bart_fname}")
        continue
    cs_data = bart_file[dataslice] 


this file does not load: Failed to load /DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Preprocessed_CS/multicoil_val/file1000287_cs.npy

WAS BECAUSE OF COPY PASTING, AND STOPPING!!

In [20]:
error_file = '/DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Preprocessed_CS/multicoil_val/file1000287_cs.npy'
np.load(error_file)

array([[[ 0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j, ...,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j],
        [ 0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j, ...,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j],
        [ 0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j, ...,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j],
        ...,
        [ 0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j, ...,
          0.00000000e+00+0.00000000e+00j,
          0.00000000e+00+0.00000000e+00j

In [12]:

bart_fname = Path(fname).name.replace('.h5', '_cs.npy')
print(f" BART file: {bart_fname}")


 BART file: file_brain_AXT1PRE_200_6002399_cs.npy


mri_data.py

In [13]:
from preprocessed_mri_data import SliceDataset, CombinedSliceDataset
data_path_train = data_path / "multicoil_train"
bart_path_train = bart_path / "multicoil_train"
dataset = SliceDataset(
    root=data_path_train,
    bart_path= bart_path_train, # ADDED
    transform=val_transform,
    challenge="multicoil",
)

challenge="multicoil"
data_paths = [
    data_path / f"{challenge}_train",  # multicoil_train
    data_path / f"{challenge}_val",    # multicoil_val
]
# POINT TO BART RECONSTRUCTIONS where train/val/test division should be done
bart_paths = [
    bart_path / "multicoil_train",  
    bart_path / "multicoil_val",    
]
data_transforms = [train_transform, val_transform]
challenges = [challenge, challenge]

dataset = CombinedSliceDataset(
    roots=data_paths,
    bart_paths= bart_paths, # ADDED
    transforms=data_transforms,
    challenges=challenges,
)