# Creating a dataset for stroke presence prediction

Method: split whole brain into two hemispheres - one with stroke and one without
Rationale: Create dataset for hemispheric stroke presence classification to determine which topological features are most useful for vascular imaging


In [None]:
import sys
path_bnd = '../'
sys.path.insert(1, path_bnd)
import analysis_tools.data_loader as dl
from gsprep.visual_tools.visual import display, idisplay
import numpy as np

## Import data

In [None]:
data_dir = '/Users/julian/stroke_research/brain_and_donuts/full_datasets'
filename = 'withAngio_all_2016_2017.npz'
clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, brain_masks, ids, params = \
dl.load_structured_data(data_dir, filename=filename)

# Reshape ct_inputs as it has 1 channel
ct_inputs = ct_inputs.reshape((*ct_inputs.shape[:-1]))

## Setting up data exploration set

In [None]:
# Data subset
n_images = 5
subsampling_rate = 2
X = (ct_inputs[:n_images] * brain_masks[:n_images])[range(n_images), ::subsampling_rate, ::subsampling_rate, ::subsampling_rate]
y = (ct_lesion_GT[:n_images] * brain_masks[:n_images])[range(n_images), ::subsampling_rate, ::subsampling_rate, ::subsampling_rate]

print(X.shape)
print(y.shape)

In [None]:
idisplay(X[0], mask=y[0])

Create a function to split the provided data into the two hemispheres.

In [None]:
def brain_to_hemispheres(data, uniform_side=True):
    x_center = data.shape[1] // 2

    # split brain (here in image denomination, anatomical denomination would be the contrary)
    right_side = data[:, x_center:]
    left_side = data[:, :x_center]
    if uniform_side:
        transposed_right_side = np.flip(right_side, axis=1)
        print(transposed_right_side.shape, left_side.shape)
        hemispheres = np.concatenate((left_side, transposed_right_side), axis=0)
        print(hemispheres.shape)
        return hemispheres
    else:
        hemispheres = np.concatenate((left_side, right_side), axis=0)
        return hemispheres


In [None]:
hemi_X, hemi_y = brain_to_hemispheres(X), brain_to_hemispheres(y)
display(hemi_X[0])
display(hemi_y[0])
display(hemi_X[1])
display(hemi_y[1])


## Transform whole dataset


In [None]:
outdir = data_dir
out_file_name = 'withAngio_hemispheres_all_2016_2017.npz'
dl.brain_volumes_dataset_to_hemispheric_dataset(data_dir, filename=filename, outdir=outdir, out_file_name = out_file_name)

Load newly created hemispheric dataset

In [None]:
clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, brain_masks, ids, params = \
dl.load_structured_data(outdir, filename=out_file_name)
ct_inputs.shape, ct_lesion_GT.shape, ids.shape

In [None]:
display(ct_inputs[0], mask=ct_lesion_GT[0])
display(ct_inputs[1], mask=ct_lesion_GT[1])

In [None]:
stroke_presence_GT = np.any(ct_lesion_GT, axis=(1,2,3))
print(f'Total number of hemispheres: {stroke_presence_GT.shape}, of which {np.sum(stroke_presence_GT)} contain a stroke lesion')
