In [50]:
import os
BASE = "/Users/YujiaLiu/Desktop/download_test/data"
DATASET = "fmri"
root = os.path.join(BASE, DATASET)

# Load the lemur library
import sys
sys.path.append(".")
import lemur.datasets as lds
import lemur.metrics as lms
import lemur.plotters as lpl
import lemur.embedders as leb
import boto3
import io
import glob
import pandas as pd
import colorlover as cl
import plotly.graph_objs as go
import numpy as np
from plotly.offline import iplot, plot
import nibabel as nib

In [45]:
class BIDSParser:
    def __init__(self, base_path):
        dataset_name = os.path.basename(os.path.normpath(base_path))
        dataset = {}
        subjects = [os.path.basename(x) for x in glob.glob(base_path + "/*")]
        if "chanlocs.csv" in subjects:
            subjects.remove("chanlocs.csv")
        if "metadata.json" in subjects:
            subjects.remove("metadata.json")
        for s in subjects:
            dataset.update({s:{}})
        for s in subjects:
            modalities = [os.path.basename(x) for x in glob.glob(os.path.join(base_path, s) + "/*")]
#             print('modalities')
#             print(modalities)
            for m in modalities:
                print (m)
                dataset[s].update({m:{}})
                files = [os.path.basename(x) for x in glob.glob(os.path.join(base_path, s, m) + "/*")]
                print(files)
                for f in files:
                    t = "".join(f.split("_")[1:]).split(".")[0]
                    dataset[s][m].update({t:f})
        self.dataset = dataset
        self.base_path = base_path
        print(self.dataset)

    def getModalityFrame(self, modality, extension):
        files = []
        subjects = []
        tasks = []
        for s in self.dataset.keys():
            print (s)
            for t in self.dataset[s][modality].keys():
                print (t)
                f = self.dataset[s][modality][t]
                print (f)
                if f.endswith(extension):
                    files.append(os.path.join(self.base_path, s, modality, f))
                    subjects.append(s)
                    tasks.append(t)
        print (files)
        print (subjects)
        print (tasks)
        d = {
            "resource_path": files,
            "subjects": subjects,
            "tasks": tasks        
        }
        return pd.DataFrame(d)

In [46]:
bp = BIDSParser(root)
dataset_descriptor = bp.getModalityFrame("func", "nii.gz").iloc[:3]
print(dataset_descriptor)

dwi
['sub-NDARAA536PTU_acq-64dir_dwi.json', 'sub-NDARAA536PTU_acq-64dirTRACEW_dwi.json', 'sub-NDARAA536PTU_acq-64dir_dwi.bvec', 'sub-NDARAA536PTU_acq-64dirTRACEW_dwi.nii.gz', 'sub-NDARAA536PTU_acq-64dir_dwi.bval']
fmap
['sub-NDARAA536PTU_dir-PA_acq-dwi_epi.nii.gz', 'sub-NDARAA536PTU_magnitude2.nii.gz', 'sub-NDARAA536PTU_phasediff.nii.gz', 'sub-NDARAA536PTU_magnitude1.json', 'sub-NDARAA536PTU_dir-AP_acq-dwi_epi.json', 'sub-NDARAA536PTU_dir-AP_acq-dwi_epi.nii.gz', 'sub-NDARAA536PTU_magnitude1.nii.gz', 'sub-NDARAA536PTU_dir-PA_acq-dwi_epi.json', 'sub-NDARAA536PTU_phasediff.json', 'sub-NDARAA536PTU_magnitude2.json']
anat
['sub-NDARAA536PTU_FLAIR.json', 'sub-NDARAA536PTU_MT-on_PD.json', 'sub-NDARAA536PTU_MT-off_PD.nii.gz', 'sub-NDARAA536PTU_MT-off_PD.json', 'sub-NDARAA536PTU_T1w.json']
func
['sub-NDARAA536PTU_task-rest1_bold.json', 'sub-NDARAA536PTU_task-rest_bold.nii.gz']
fmap
['sub-NDARAA075AMK_magnitude2.json', 'sub-NDARAA075AMK_magnitude1.nii.gz', 'sub-NDARAA075AMK_phasediff.json', 'sub

In [47]:
out_base = os.path.join(BASE, "fmri_derivatives")
out_emb_base = os.path.join(BASE, "fmri_embedded_deriatives")
os.makedirs(out_base + "/agg", exist_ok=True)
os.makedirs(out_emb_base + "/agg", exist_ok=True)

In [48]:
fds = lds.fMRIDataSet(dataset_descriptor)
# Create a lemur distance matrix based on the EEG data
DM = lds.DistanceMatrix(fds, lms.DiffAve)
DM.name = "fmri-DistanceMatrix"

<lemur.datasets.fMRIDataSet object at 0x115fc5cf8>


In [19]:
# Create an embedded distance matrix object under MDS
MDSEmbedder = leb.MDSEmbedder(num_components=10)
fMRI_Embedded = MDSEmbedder.embed(DM)

In [21]:
class MatrixPlotter:
    def __init__(self, DS, mode="notebook", base_path = None):
        self.DS = DS
        self.plot_mode = mode
        self.base_path = base_path

        Reds = cl.scales['8']['seq']['Reds']
        self.Reds = list(zip(np.linspace(0, 1, len(Reds)), Reds))

        BuRd = cl.scales['11']['div']['RdBu'][::-1]
        self.BuRd = list(zip(np.linspace(0, 1, len(BuRd)), BuRd))

    def makeplot(self, fig, local_path=None):
        """Make the plotly figure visable to the user in the way they want.

        Parameters
        ----------
        gid : :obj:`figure`
            An plotly figure.

        """

        if self.plot_mode == "notebook":
            iplot(fig)
        if self.plot_mode == "savediv":
            fig["layout"]["autosize"] = True
            div = plot(fig, output_type='div', include_plotlyjs=True)
            path = os.path.join(self.base_path, local_path + ".html")
            os.makedirs("/".join(path.split("/")[:-1]), exist_ok=True)
            with open(path, "w") as f:
                f.write(div)
                f.close()

        if self.plot_mode == "div":
            fig["layout"]["autosize"] = True
            return plot(fig, output_type='div', include_plotlyjs=True)

In [22]:
class SquareHeatmap(MatrixPlotter):
    titlestring = "%s Heatmap"
    shortname = "squareheat"

    def plot(self):
        """Constructs a distance matrix heatmap using the :obj:`DistanceMatrix` object, in plotly.

        """
        title = self.titlestring % (self.DS.name)
        xaxis = go.XAxis(
                title=self.DS.D.index.name,
                ticktext = self.DS.D.index,
                ticks = "",
                showticklabels=False,
                showgrid=False,
                mirror=True,
                tickvals = [i for i in range(len(self.DS.D.index))])
        yaxis = go.YAxis(
                scaleanchor="x",
                title=self.DS.D.index.name,
                ticktext = self.DS.D.index,
                showgrid=False,
                ticks = "",
                showticklabels=False,
                mirror=True,
                tickvals = [i for i in range(len(self.DS.D.index))])
        layout = dict(title=title, xaxis=xaxis, yaxis=yaxis)
        trace = go.Heatmap(z = self.DS.D.as_matrix().T)
        data = [trace]
        fig = dict(data=data, layout=layout)
        return self.makeplot(fig, "agg/" + self.shortname)

In [27]:
SquareHeatmap(DM, mode="savediv", base_path=out_base).plot()
SquareHeatmap(fMRI_Embedded, mode="savediv",
                  base_path=out_emb_base).plot()

# fMRI

In [None]:
import os
BASE = '/Users/YujiaLiu/Desktop/test'
DATASET = 'fmri'
root = os.path.join(BASE, DATASET)
out_base = os.path.join(BASE, "fmri_derivatives")
out_emb_base = os.path.join(BASE, "fmri_embedded_deriatives")
os.makedirs(out_base + "/agg", exist_ok=True)
os.makedirs(out_emb_base + "/agg", exist_ok=True)

In [None]:
class fMRIDataSet:

    def __init__(self, dataframe_descriptor, name="fmri"):
        self.D = dataframe_descriptor
        self.D.index = self.D["subjects"] + "-" + self.D["tasks"]
        self.D.index.name = "index"
        self.name = name
        self.n = self.D.shape[0]

    def getResource(self, index):
        resource = self.D.ix[index]
        return resource

    def getMatrix(self, index):
        resource_path = self.D.ix[index][0]
        return nib.load(resource_path).get_data()

In [53]:
f = []
# s = ['sub-NDARAA117NEJ','sub-NDARAC904DMU']
s = []
# t = ['task-RestingStateeeg', 'task-RestingStateeeg']
t = []

s3 = boto3.resource('s3')
bucket = s3.Bucket('redlemurtest')
# Directly read through S3 bucket and pass into pandas dataframe
for obj in bucket.objects.all():
    key = obj.key
    if key.startswith('fmri/s'):
        keys = key.split('/')
        if keys[2] == 'func':
            if key.endswith('.gz'):
                print (keys)
                s.append(keys[1])
                task = keys[3].split('_')[1] + keys[0]
                t.append(task)
                body = obj.get()['Body'].read()
                file = nib.streamlines.load(body).get_data()
                f.append(file)

#     if key.endswith('.pkl'):
#         keys = key.split("/")
#         subject = keys[1]
#         task = keys[3].split('_')[1] + keys[0]
#         s.append(subject)
#         t.append(task)
#         body = obj.get()['Body'].read()
#         pkl = pd.read_pickle(io.BytesIO(body))
#         f.append(pkl)
#     if key.endswith('chanlocs.csv'):
#         body = obj.get()['Body'].read()
#         chanlocs = pd.read_csv(io.BytesIO(body))
# spatial = lds.DataSet(chanlocs[["X", "Y", "Z"]], "Spatial")
# spatialDM = lds.DistanceMatrix(spatial, lms.VectorDifferenceNorm)

print (s)
print (t)

['fmri', 'sub-NDARAA075AMK', 'func', 'sub-NDARAA075AMK_task-rest_bold.nii.gz']


ValueError: embedded null byte

In [43]:
d = {
            "resource_path": f,
            "subjects": s,
            "tasks": t        
    }
descriptor = pd.DataFrame(d)
print (descriptor)

                         resource_path          subjects          tasks
0  <_io.BytesIO object at 0x11643a410>  sub-NDARAA075AMK  task-restfmri
1  <_io.BytesIO object at 0x11643a048>  sub-NDARAA536PTU  task-restfmri
