In [1]:
%matplotlib inline

import os
import h5py
import numpy as np
from matplotlib import pyplot as plt


In [2]:
download_path = '/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI'
dicom_path = os.path.join(download_path,'fastMRI_brain_DICOM')
train_path = os.path.join(download_path,'multicoil_train')
validation_path = os.path.join(download_path,'multicoil_val')
test_path = os.path.join(download_path,'multicoil_test')
fully_sampled_test_path = os.path.join(download_path,'multicoil_test_full')

Explore generalized properties of the data using all training files

In [7]:
from pathlib import Path

# Cluster the training data based on number of coils used
clustered_data = {}
files = Path(train_path).glob('**/*')
for file in files:
    hf_loop = h5py.File(file)
    shape = hf_loop['kspace'][()].shape
    num_coils = shape[1]
    if num_coils not in clustered_data:
       clustered_data[num_coils] = [file]
    else:
       clustered_data[num_coils].append(file)


In [8]:
# Save dictionary to .npy file
np.save("/usr/local/micapollo01/MIC/DATA/STUDENTS/mvhave7/Results/Preprocessing/exploration/coil_clustered_fastmri_data.npy", clustered_data)

# To load:
clustered_data = np.load("/usr/local/micapollo01/MIC/DATA/STUDENTS/mvhave7/Results/Preprocessing/exploration/coil_clustered_fastmri_data.npy", allow_pickle=True)
clustered_data = clustered_data.item()

In [9]:
for key, list in dict(clustered_data).items():
    print('Number of coils:'+str(key))
    print('Number of scans:'+str(len(list)))

# We can see that the most common number of coils are (in descending order) 16, 4 and 20

Number of coils:12
Number of scans:245
Number of coils:20
Number of scans:1173
Number of coils:4
Number of scans:1214
Number of coils:16
Number of scans:1434
Number of coils:14
Number of scans:264
Number of coils:5
Number of scans:22
Number of coils:6
Number of scans:61
Number of coils:24
Number of scans:7
Number of coils:18
Number of scans:26
Number of coils:2
Number of scans:7
Number of coils:8
Number of scans:10
Number of coils:10
Number of scans:3
Number of coils:28
Number of scans:2
Number of coils:22
Number of scans:1


In [10]:
# Cluster the 16-coil training data based on slice dimensions, as the 16-coil data corresponds to the most scans
clustered_data_2 = {}
for file in clustered_data[16]:
    hf_loop = h5py.File(file)
    shape = hf_loop['kspace'][()].shape
    slice_height = shape[2]
    slice_width = shape[3]
    key = (slice_height,slice_width)
    if key not in clustered_data_2:
        clustered_data_2[key] = [file]
    else:
        clustered_data_2[key].append(file)


In [2]:
# To load:
clustered_data_2 = np.load("/usr/local/micapollo01/MIC/DATA/STUDENTS/mvhave7/Results/Preprocessing/exploration/16coil_slice_size_clustered_fastmri_data.npy", allow_pickle=True)
clustered_data_2 = clustered_data_2.item()

In [12]:
for key, list in clustered_data_2.items():
    print('Slice dimensions:'+str(key))
    print('Number of scans:'+str(len(list)))


Slice dimensions:(640, 320)
Number of scans:726
Slice dimensions:(768, 396)
Number of scans:630
Slice dimensions:(640, 272)
Number of scans:25
Slice dimensions:(768, 324)
Number of scans:4
Slice dimensions:(640, 274)
Number of scans:1
Slice dimensions:(640, 264)
Number of scans:27
Slice dimensions:(640, 280)
Number of scans:1
Slice dimensions:(512, 320)
Number of scans:1
Slice dimensions:(640, 332)
Number of scans:1
Slice dimensions:(512, 234)
Number of scans:4
Slice dimensions:(640, 260)
Number of scans:5
Slice dimensions:(640, 262)
Number of scans:2
Slice dimensions:(512, 256)
Number of scans:1
Slice dimensions:(512, 214)
Number of scans:3
Slice dimensions:(768, 342)
Number of scans:2
Slice dimensions:(640, 312)
Number of scans:1


In [13]:
# Save dictionary to .npy file
np.save("/usr/local/micapollo01/MIC/DATA/STUDENTS/mvhave7/Results/Preprocessing/exploration/16coil_slice_size_clustered_fastmri_data.npy", clustered_data_2)


In [13]:
count_AXFLAIR = 0
count_AXT1 = 0
count_AXT2 = 0
count_others = 0

for filename in clustered_data_2[(640,320)]:
    filename = str(filename)
    if 'AXFLAIR' in filename:
        count_AXFLAIR += 1
    elif 'AXT1' in filename:
        count_AXT1 += 1
    elif 'AXT2' in filename:
        count_AXT2 += 1
    else:
        count_others += 1


In [8]:
print(f"AXFLAIR: {count_AXFLAIR}")
print(f"AXT1: {count_AXT1}")
print(f"AXT2: {count_AXT2}")
print(f"Others: {count_others}")


AXFLAIR: 95
AXT1: 407
AXT2: 224
Others: 0


In [9]:
count_AXFLAIR = 0
count_AXT1 = 0
count_AXT2 = 0
count_others = 0

for filename in clustered_data_2[(640,320)][:70]:
    filename = str(filename)
    if 'AXFLAIR' in filename:
        count_AXFLAIR += 1
    elif 'AXT1' in filename:
        count_AXT1 += 1
    elif 'AXT2' in filename:
        count_AXT2 += 1
    else:
        count_others += 1

In [10]:
print(f"AXFLAIR: {count_AXFLAIR}")
print(f"AXT1: {count_AXT1}")
print(f"AXT2: {count_AXT2}")
print(f"Others: {count_others}")


AXFLAIR: 7
AXT1: 37
AXT2: 26
Others: 0


In [12]:
for filename in clustered_data_2[(640,320)][:70]:
    print(str(filename))


/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI/multicoil_train/file_brain_AXT1_202_6000340.h5
/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI/multicoil_train/file_brain_AXFLAIR_200_6002493.h5
/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI/multicoil_train/file_brain_AXT2_202_2020075.h5
/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI/multicoil_train/file_brain_AXT2_202_2020327.h5
/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI/multicoil_train/file_brain_AXT2_206_2060005.h5
/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI/multicoil_train/file_brain_AXT1POST_210_6001724.h5
/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI/multicoil_train/file_brain_AXT1POST_210_6001704.h5
/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI/multicoil_train/file_brain_AXT1POST_202_6000351.h5
/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI/multicoil_train/file_brain_AXT2_202_2020583.h5
/usr/local/micapollo01/MIC/DATA/SHARED/NYU_FastMRI/multicoil_train/file_brain_AXT1PRE_200_6002041.h5
/u