In [1]:
%matplotlib inline

import os
import h5py
import numpy as np
from matplotlib import pyplot as plt


In [25]:
download_path = '/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/'
train_path = os.path.join(download_path,'multicoil_train')
validation_path = os.path.join(download_path,'multicoil_val')
test_path = os.path.join(download_path,'multicoil_test')
fully_sampled_test_path = os.path.join(download_path,'multicoil_test_full')

Explore generalized properties of the data using all training files

In [26]:
from pathlib import Path

# Cluster the training data based on number of coils used
# TAKES A LONG TIME TO RUN(113min): LOAD IF POSSIBLE
clustered_data = {}
files = Path(train_path).glob('**/*')

for file in files:
    hf_loop = h5py.File(file)
    shape = hf_loop['kspace'][()].shape
    num_coils = shape[1]
    if num_coils not in clustered_data:
       clustered_data[num_coils] = [file]
    else:
       clustered_data[num_coils].append(file)


In [None]:
# Save dictionary to .npy file
np.save("/DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Results/exploration/coil_clustered_fastmri_data.npy", clustered_data)

In [27]:

# To load:
clustered_data = np.load("/DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Results/exploration/coil_clustered_fastmri_data.npy", allow_pickle=True)
clustered_data = clustered_data.item()
print(np.shape(clustered_data))

()


In [30]:
for key, list in dict(clustered_data).items():
    print('Number of coils:'+str(key))
    print('Number of scans:'+str(len(list)))

# We can see that the most common number of coils are (in descending order) 16, 4 and 20

Number of coils:4
Number of scans:1212
Number of coils:12
Number of scans:244
Number of coils:16
Number of scans:1430
Number of coils:20
Number of scans:1170
Number of coils:14
Number of scans:263
Number of coils:6
Number of scans:61
Number of coils:5
Number of scans:22
Number of coils:8
Number of scans:10
Number of coils:10
Number of scans:3
Number of coils:22
Number of scans:1
Number of coils:18
Number of scans:26
Number of coils:2
Number of scans:7
Number of coils:24
Number of scans:7
Number of coils:28
Number of scans:2


In [31]:
# Cluster the 16-coil training data based on slice dimensions, as the 16-coil data corresponds to the most scans
clustered_data_2 = {}
for file in clustered_data[16]:
    hf_loop = h5py.File(file)
    shape = hf_loop['kspace'][()].shape
    slice_height = shape[2]
    slice_width = shape[3]
    key = (slice_height,slice_width)
    if key not in clustered_data_2:
        clustered_data_2[key] = [file]
    else:
        clustered_data_2[key].append(file)


In [32]:
# Save dictionary to .npy file
np.save("/DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Results/exploration/16coil_slice_size_clustered_fastmri_data.npy", clustered_data_2)

In [33]:
# To load:
clustered_data_2 = np.load("/DATASERVER/MIC/GENERAL/STUDENTS/aslock2/Results/exploration/16coil_slice_size_clustered_fastmri_data.npy", allow_pickle=True)
clustered_data_2 = clustered_data_2.item()

In [34]:
for key, list in clustered_data_2.items():
    print('Slice dimensions:'+str(key))
    print('Number of scans:'+str(len(list)))


Slice dimensions:(640, 320)
Number of scans:723
Slice dimensions:(768, 396)
Number of scans:629
Slice dimensions:(640, 272)
Number of scans:25
Slice dimensions:(640, 264)
Number of scans:27
Slice dimensions:(640, 262)
Number of scans:2
Slice dimensions:(768, 324)
Number of scans:4
Slice dimensions:(512, 234)
Number of scans:4
Slice dimensions:(768, 342)
Number of scans:2
Slice dimensions:(640, 260)
Number of scans:5
Slice dimensions:(640, 312)
Number of scans:1
Slice dimensions:(512, 214)
Number of scans:3
Slice dimensions:(640, 280)
Number of scans:1
Slice dimensions:(512, 320)
Number of scans:1
Slice dimensions:(640, 274)
Number of scans:1
Slice dimensions:(512, 256)
Number of scans:1
Slice dimensions:(640, 332)
Number of scans:1


In [35]:
count_AXFLAIR = 0
count_AXT1 = 0
count_AXT2 = 0
count_others = 0

for filename in clustered_data_2[(640,320)]:
    filename = str(filename)
    if 'AXFLAIR' in filename:
        count_AXFLAIR += 1
    elif 'AXT1' in filename:
        count_AXT1 += 1
    elif 'AXT2' in filename:
        count_AXT2 += 1
    else:
        count_others += 1


In [36]:
print(f"AXFLAIR: {count_AXFLAIR}")
print(f"AXT1: {count_AXT1}")
print(f"AXT2: {count_AXT2}")
print(f"Others: {count_others}")


AXFLAIR: 94
AXT1: 406
AXT2: 223
Others: 0


In [37]:
count_AXFLAIR = 0
count_AXT1 = 0
count_AXT2 = 0
count_others = 0

for filename in clustered_data_2[(640,320)][:70]:
    filename = str(filename)
    if 'AXFLAIR' in filename:
        count_AXFLAIR += 1
    elif 'AXT1' in filename:
        count_AXT1 += 1
    elif 'AXT2' in filename:
        count_AXT2 += 1
    else:
        count_others += 1

In [38]:
print(f"AXFLAIR: {count_AXFLAIR}")
print(f"AXT1: {count_AXT1}")
print(f"AXT2: {count_AXT2}")
print(f"Others: {count_others}")


AXFLAIR: 10
AXT1: 44
AXT2: 16
Others: 0


In [39]:
for filename in clustered_data_2[(640,320)][:70]:
    print(str(filename))


/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/multicoil_train/file_brain_AXT1POST_210_6001620.h5
/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/multicoil_train/file_brain_AXT1PRE_205_6000021.h5
/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/multicoil_train/file_brain_AXT2_202_2020162.h5
/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/multicoil_train/file_brain_AXT1POST_202_6000281.h5
/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/multicoil_train/file_brain_AXT1POST_200_6001969.h5
/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/multicoil_train/file_brain_AXT1POST_205_2050055.h5
/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/multicoil_train/file_brain_AXT2_202_2020467.h5
/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/multicoil_train/file_brain_AXT2_205_6000061.h5
/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/multicoil_train/file_brain_AXT2_202_2020578.h5
/DATASERVER/MIC/SHARED/NYU_FastMRI/Preprocessed/multicoil_train/file_brain_AXT1POST_205_2050233.h5
/DATASERVER/MIC/SHARED/NYU_