# Data Research & Data Separation of MRI_brain
## 0. import package and set config

In [1]:
# import package
import os
import h5py
import numpy as np
import pandas as pd


In [24]:
class config(object):
    data_root = '/Users/gofinge/Documents/Data/BrainMRI_raw/train'
    data_name = 'train_pre_data.h5'
    label_name = 'train_pre_label.csv'
    
    export_dir = '/Users/gofinge/Documents/Data/BrainMRI/'

conf = config()

## 1. load data & reshape

In [40]:
# load MRI
f_raw = h5py.File(os.path.join(conf.data_root, conf.data_name), 'r')
for key in f_raw.keys():
    print(key)
    print(f_raw[key].shape)

mri = f_raw['data'][...]
mri = np.squeeze(mri)
print(mri.shape)

data
(300, 1, 79, 95, 79)
(300, 79, 95, 79)


In [42]:
# load label
labels = np.array(pd.read_csv(os.path.join(conf.data_root, conf.label_name))['label'])
print(labels)
print(type(labels))

[1 0 0 1 0 1 2 1 1 2 2 0 1 2 1 0 2 1 2 1 0 1 2 2 1 0 1 0 1 1 0 2 1 1 1 1 0
 1 1 1 2 1 2 0 1 2 1 2 0 1 2 2 1 2 1 1 2 1 2 1 1 2 1 0 2 1 0 0 0 1 2 0 1 0
 1 1 1 1 2 0 1 0 1 1 1 2 1 2 0 1 2 1 1 1 1 2 1 0 1 0 0 2 1 1 1 1 1 1 0 1 1
 2 2 1 2 2 0 1 0 2 1 0 2 2 0 1 2 2 0 1 1 1 1 1 1 1 1 2 0 0 0 0 0 2 0 1 1 1
 0 2 1 0 1 2 0 1 2 2 1 1 0 2 2 1 0 2 1 1 2 1 2 1 0 0 1 1 1 1 1 1 0 2 1 1 1
 2 1 1 1 1 2 0 0 2 1 2 1 2 1 1 2 1 0 2 1 1 1 0 1 2 2 0 2 1 0 1 0 1 0 2 1 2
 1 1 0 1 1 1 1 1 2 1 1 1 1 1 0 1 2 1 2 2 0 0 1 0 0 1 0 2 1 1 0 1 2 0 2 2 2
 0 1 1 0 1 0 2 1 2 1 2 1 1 1 2 2 1 0 1 1 1 2 1 1 1 0 2 1 2 1 2 1 1 0 2 1 2
 1 1 0 2]
<class 'numpy.ndarray'>


## 2. separate mri & labels

In [16]:
# labels count
index0 = np.where(labels == 0) # 68
index1 = np.where(labels == 1) # 151
index2 = np.where(labels == 2) # 81

print(np.size(index0))
print(np.size(index1))
print(np.size(index2))

68
151
81


In [43]:
# separation
labels.sort()
test_idx = list(np.linspace(0, 299, num = 45, endpoint = False, dtype = int))
val_idx = [x+1 for x in test_idx]
train_idx = list(np.delete([i for i in range(300)], test_idx + val_idx, axis = 0))

train_mri = mri[train_idx]
val_mri = mri[val_idx]
test_mri = mri[test_idx]

train_labels = labels[train_idx]
val_labels = labels[val_idx]
test_labels = labels[test_idx]

print(train_mri.shape)
print(val_mri.shape)
print(test_mri.shape)

(210, 79, 95, 79)
(45, 79, 95, 79)
(45, 79, 95, 79)


In [51]:
# save data
def save_hdf5(data, labels, filename):
    file = h5py.File(filename + '.h5','w')
    file.create_dataset('data', data = data)
    labels_str = [str(x) for x in labels]
    np.savetxt(filename + '_labels.csv', labels_str, fmt = '%s')
    
    
save_hdf5(train_mri, train_labels, os.path.join(conf.export_dir, 'train'))
save_hdf5(val_mri, val_labels, os.path.join(conf.export_dir, 'val'))
save_hdf5(test_mri, test_labels, os.path.join(conf.export_dir, 'test'))