In [14]:
import lemur.datasets as lds
import lemur.metrics as lms
import lemur.plotters as lpl
import lemur.embedders as leb
import boto3
import io
import glob
import pandas as pd
import colorlover as cl
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, plot
import numpy as np

# Current method (EEG)

In [2]:
class BIDSParser:
    def __init__(self, base_path):
        dataset_name = os.path.basename(os.path.normpath(base_path))
        dataset = {}
        subjects = [os.path.basename(x) for x in glob.glob(base_path + "/*")]
        if "chanlocs.csv" in subjects:
            subjects.remove("chanlocs.csv")
        if "metadata.json" in subjects:
            subjects.remove("metadata.json")
        for s in subjects:
            dataset.update({s:{}})
        for s in subjects:
            modalities = [os.path.basename(x) for x in glob.glob(os.path.join(base_path, s) + "/*")]
            for m in modalities:
                dataset[s].update({m:{}})
                files = [os.path.basename(x) for x in glob.glob(os.path.join(base_path, s, m) + "/*")]
                for f in files:
                    t = "".join(f.split("_")[1:]).split(".")[0]
                    dataset[s][m].update({t:f})
        self.dataset = dataset
        self.base_path = base_path
#         print(self.dataset)

    def getModalityFrame(self, modality, extension):
        files = []
        subjects = []
        tasks = []
        for s in self.dataset.keys():
            for t in self.dataset[s][modality].keys():
                f = self.dataset[s][modality][t]
                if f.endswith(extension):
                    files.append(os.path.join(self.base_path, s, modality, f))
                    subjects.append(s)
                    tasks.append(t)
#         print (files)
#         print (subjects)
#         print (tasks)
        d = {
            "resource_path": files,
            "subjects": subjects,
            "tasks": tasks        
        }
        return pd.DataFrame(d)

In [3]:
import os
BASE = "/Users/YujiaLiu/Desktop/download_test/data"
DATASET = "eeg"
root = os.path.join(BASE, DATASET)
# root = '/eeg'
# print (root)
bp = BIDSParser(root)
dataset_descriptor = bp.getModalityFrame("preprocessed", ".pkl").iloc[:6]
out_base = os.path.join(BASE, "eeg_derivatives")
out_emb_base = os.path.join(BASE, "eeg_embedded_deriatives")
os.makedirs(out_base + "/agg", exist_ok=True)
os.makedirs(out_emb_base + "/agg", exist_ok=True)
print (dataset_descriptor)

                                       resource_path          subjects  \
0  /Users/YujiaLiu/Desktop/download_test/data/eeg...  sub-NDARAC904DMU   
1  /Users/YujiaLiu/Desktop/download_test/data/eeg...  sub-NDARAA117NEJ   

                  tasks  
0  task-RestingStateeeg  
1  task-RestingStateeeg  


In [4]:
chanlocs = pd.read_csv(root+"/chanlocs.csv")
spatial = lds.DataSet(chanlocs[["X", "Y", "Z"]], "Spatial")
spatialDM = lds.DistanceMatrix(spatial, lms.VectorDifferenceNorm)

In [5]:
eds = lds.EEGDataSet(dataset_descriptor)
# Create a lemur distance matrix based on the EEG data
DM = lds.DistanceMatrix(eds, lms.FroCorr)
DM.name = "eeg-DistanceMatrix"

In [6]:
class TimeSeriesPlotter:
    """A generic one-to-one plotter for time series data to be extended.

    Parameters
    ----------
    data : :obj:`ndarray`
        The time series data.
    resource_name : string
        The name of the time series being plotted.
    row_name : string
        The name of the rows in the time-series (e.g. channels, sources. ect.).
    column_name : string
        The name of the columns in the time-series (e.g. time points, time steps, seconds, ect.).

    Attributes
    ----------
    data : :obj:`ndarray`
        The time series data.
    d : int
        The number of dimensions in the time series
    n : int
        The number of time points in the time series
    row_name : string
        The name of the rows in the time-series (e.g. channels, sources. ect.).
    column_name : string
        The name of the columns in the time-series (e.g. time points, time steps, seconds, ect.).
    resource_name : string
        The name of the time series being plotted.

    """

    def __init__(self, DS, mode="notebook", base_path=None):
        self.data = DS.D.as_matrix().T
        self.d, self.n = self.data.shape
        self.d_names = DS.D.columns
        self.row_name = "Channels"
        self.col_name = "Time Points"
        self.resource_name = DS.name
        self.plot_mode = mode
        self.base_path = base_path

    def makeplot(self, fig, local_path=None):
        """Make the plotly figure visable to the user in the way they want.

        Parameters
        ----------
        gid : :obj:`figure`
            An plotly figure.

        """

        if self.plot_mode == "notebook":
            iplot(fig)
        if self.plot_mode == "savediv":
            fig["layout"]["autosize"] = True
            div = plot(fig, output_type='div', include_plotlyjs=True)
            path = os.path.join(self.base_path, local_path + ".html")
            os.makedirs("/".join(path.split("/")[:-1]), exist_ok=True)
            with open(path, "w") as f:
                f.write(div)
                f.close()

        if self.plot_mode == "div":
            fig["layout"]["autosize"] = True
            return plot(fig, output_type='div', include_plotlyjs=True)

In [7]:
class MatrixPlotter:
    def __init__(self, DS, mode="notebook", base_path = None):
        self.DS = DS
        self.plot_mode = mode
        self.base_path = base_path

        Reds = cl.scales['8']['seq']['Reds']
        self.Reds = list(zip(np.linspace(0, 1, len(Reds)), Reds))

        BuRd = cl.scales['11']['div']['RdBu'][::-1]
        self.BuRd = list(zip(np.linspace(0, 1, len(BuRd)), BuRd))

    def makeplot(self, fig, local_path=None):
        """Make the plotly figure visable to the user in the way they want.

        Parameters
        ----------
        gid : :obj:`figure`
            An plotly figure.

        """

        if self.plot_mode == "notebook":
            iplot(fig)
        if self.plot_mode == "savediv":
            fig["layout"]["autosize"] = True
            div = plot(fig, output_type='div', include_plotlyjs=True)
            path = os.path.join(self.base_path, local_path + ".html")
            os.makedirs("/".join(path.split("/")[:-1]), exist_ok=True)
            with open(path, "w") as f:
                f.write(div)
                f.close()

        if self.plot_mode == "div":
            fig["layout"]["autosize"] = True
            return plot(fig, output_type='div', include_plotlyjs=True)

In [8]:
class SparkLinePlotter(TimeSeriesPlotter):
    titlestring = "Sparklines for %s"
    shortname = "sparkline"

    def plot(self, sample_freq):
        """Constructs a downsampled spark line plot of the time series.

        If there are more than 500 time points, the time series will be down sampled to
        500 column variables by windowed averaging. This is done by splitting the time series
        into 500 equal sized segments in the time domain, then plotting the mean for each segment.

        Parameters
        ----------
        sample_freq : int
            The sampling frequency (how many times sampled per second).

        """
        title = self.titlestring % (self.resource_name)
        xaxis = dict(
            title = "Time in Seconds"
        )
        yaxis = dict(
            title = "Intensity"
        )
        layout = dict(title=title, xaxis=xaxis, yaxis=yaxis)
        if self.n > 500:
            winsize = self.n // 500
            df = pd.DataFrame(self.data.T)
            df = df.groupby(lambda x: x // winsize).mean()
            downsampled_data = df.as_matrix().T
            data = [dict(mode="lines",
                         name = str(i),
                         x=(np.arange(downsampled_data.shape[1]) * winsize) / sample_freq,
                         y=downsampled_data[i, :]) for i in range(downsampled_data.shape[0])]
        fig = dict(data=data, layout=layout)
        self.makeplot(fig, self.resource_name + "/" + self.shortname)

In [9]:
class ConnectedScatterplot(MatrixPlotter):
    titlestring = "%s Scatterplot"
    shortname = "connectedscatter"

    def plot(self, spatialDM):
        """Constructs a distance matrix heatmap using the :obj:`DistanceMatrix` object, in plotly.

        """
        title = self.titlestring % (self.DS.name)
        DM = self.DS.D.as_matrix()
        sDM = spatialDM.D.as_matrix()
        colors = (np.nansum(DM, axis=0) - 1) / DM.shape[0]
        TSNEEmbedder = leb.TSNEEmbedder(num_components=2)
        m = TSNEEmbedder.embed(sDM)
        xaxis = go.XAxis(title=spatialDM.D.index.name)
        yaxis = go.YAxis(scaleanchor="x", title=spatialDM.D.index.name)
        layout = dict(title=title, xaxis=xaxis, yaxis=yaxis, showlegend=False)
        trace1 = go.Scatter(x = m[:, 0],
                           y = m[:, 1],
                           mode = "markers",
                           marker = dict(color=colors, size=16, showscale=True))
        distances = np.zeros([m.shape[0], m.shape[0]])
        for i in range(m.shape[0]):
            for j in range(m.shape[0]):
                distances[i, j] = np.linalg.norm(m[i, :] - m[j, :])
        med = np.median(distances)
        mask = distances < (med / 4)
        Xe = []
        Ye = []
        for i in range(distances.shape[0]):
            for j in range(distances.shape[1]):
                if mask[i, j]:
                    Xe += [m[i, 0], m[j, 0], None]
                    Ye += [m[i, 1], m[j, 1], None]
        trace2 = go.Scatter(x=Xe,
                            y=Ye,
                            mode='lines',
                            line=go.Line(color='rgb(125,125,125)', width=1),
                            hoverinfo='none')
        data = [trace1, trace2]
        fig = dict(data=data, layout=layout)
        return self.makeplot(fig, self.DS.name + "/" + self.shortname)

In [10]:
class SpatialConnectivity(MatrixPlotter):
    titlestring = "%s Spatial Connectivity"
    shortname = "spatialconn"

    def plot(self, spatial):
        title = self.titlestring % (self.DS.name)
        DM = self.DS.D.as_matrix()
        sp = spatial.D.as_matrix()
        trace1 = go.Scatter3d(
            x=sp[:, 0],
            y=sp[:, 1],
            z=sp[:, 2],
            mode='markers',
            marker=dict(
                size=12,
                opacity=0
            )
        )
        #med = np.nanmedian(DM)
        #mask = np.nan_to_num(DM) > (3 * med)
        #Xe = []
        #Ye = []
        #Ze = []
        #for i in range(DM.shape[0]):
        #    for j in range(DM.shape[1]):
        #        if mask[i, j]:
        #            Xe += [sp[i, 0], sp[j, 0], None]
        #            Ye += [sp[i, 1], sp[j, 1], None]
        #            Ze += [sp[i, 2], sp[j, 2], None]
        #print(Xe)
        #trace2 = go.Scatter3d(x=Xe,
        #                      y=Ye,
        #                      z=Ze,
        #                      mode='lines',
        #                      line=go.Line(color='rgb(125,125,125)', width=1),
        #                      hoverinfo='none')
        data = [trace1]
        fig = dict(data=data, layout={"autosize":True})
        return self.makeplot(fig, self.DS.name + "/" + self.shortname)

In [11]:
class SquareHeatmap(MatrixPlotter):
    titlestring = "%s Heatmap"
    shortname = "squareheat"

    def plot(self):
        """Constructs a distance matrix heatmap using the :obj:`DistanceMatrix` object, in plotly.

        """
        title = self.titlestring % (self.DS.name)
        xaxis = go.XAxis(
                title=self.DS.D.index.name,
                ticktext = self.DS.D.index,
                ticks = "",
                showticklabels=False,
                showgrid=False,
                mirror=True,
                tickvals = [i for i in range(len(self.DS.D.index))])
        yaxis = go.YAxis(
                scaleanchor="x",
                title=self.DS.D.index.name,
                ticktext = self.DS.D.index,
                showgrid=False,
                ticks = "",
                showticklabels=False,
                mirror=True,
                tickvals = [i for i in range(len(self.DS.D.index))])
        layout = dict(title=title, xaxis=xaxis, yaxis=yaxis)
        trace = go.Heatmap(z = self.DS.D.as_matrix().T)
        data = [trace]
        fig = dict(data=data, layout=layout)
        return self.makeplot(fig, "agg/" + self.shortname)

In [12]:
class Heatmap(MatrixPlotter):
    titlestring = "%s Heatmap"
    shortname = "heatmap"

    def plot(self, showticklabels=False):
        title = self.titlestring % (self.DS.name)
        xaxis = go.XAxis(
                title="Observations",
                ticktext = self.DS.D.index,
                ticks="",
                showticklabels=False,
                tickvals = [i for i in range(len(self.DS.D.index))])
        yaxis = go.YAxis(
                title="Dimensions",
                ticktext = self.DS.D.columns,
                ticks="",
                showticklabels=showticklabels,
                tickvals = [i for i in range(len(self.DS.D.columns))])
        layout = dict(title=title, xaxis=xaxis, yaxis=yaxis)

        maximum = self.DS.D.max().max()
        trace = go.Heatmap(z = self.DS.D.as_matrix().T,
                           zmin = -maximum,
                           zmax = maximum,
                           colorscale=self.BuRd)
        data = [trace]
        fig = dict(data=data, layout=layout)
        return self.makeplot(fig, "agg/" + self.shortname)

In [15]:
# Create an embedded distance matrix object under MDS
MDSEmbedder = leb.MDSEmbedder(num_components=10)
EEG_Embedded = MDSEmbedder.embed(DM)
for i in range(eds.n):
    single_ds = eds.getResourceDS(i)
    SparkLinePlotter(single_ds, mode="savediv", base_path=out_base).plot(sample_freq=500)

for i in range(eds.n):
    single_ds = eds.getResourceDS(i)
    single_DM = lds.DataSet(single_ds.D.corr(), single_ds.name)
    SpatialConnectivity(single_DM, mode="savediv",
                            base_path=out_base).plot(spatial)
for i in range(eds.n):
    single_ds = eds.getResourceDS(i)
    single_DM = lds.DataSet(single_ds.D.corr(), single_ds.name)
    ConnectedScatterplot(single_DM,
                             mode="savediv",
                             base_path=out_base).plot(spatialDM)

SquareHeatmap(DM, mode="savediv", base_path=out_base).plot()
Heatmap(EEG_Embedded, mode="savediv", base_path=out_emb_base).plot()

# lpl.EigenvectorHeatmap(DM, mode="savediv", base_path=out_base).plot()
# lpl.EigenvectorHeatmap(EEG_Embedded, mode="savediv",
#                        base_path=out_emb_base).plot()


# Read files directly from S3

In [16]:
import os
BASE = '/Users/YujiaLiu/Desktop/test'
DATASET = 'eeg'
root = os.path.join(BASE, DATASET)
out_base = os.path.join(BASE, "eeg_derivatives")
out_emb_base = os.path.join(BASE, "eeg_embedded_deriatives")
os.makedirs(out_base + "/agg", exist_ok=True)
os.makedirs(out_emb_base + "/agg", exist_ok=True)

In [17]:
f = []
s = []
t = []

s3 = boto3.resource('s3')
bucket = s3.Bucket('redlemurtest')
# Directly read through S3 bucket and pass into pandas dataframe
for obj in bucket.objects.all():
    key = obj.key
    if key.endswith('.pkl'):
        keys = key.split("/")
        subject = keys[1]
        task = keys[3].split('_')[1] + keys[0]
        s.append(subject)
        t.append(task)
        body = obj.get()['Body'].read()
        pkl = pd.read_pickle(io.BytesIO(body))
        f.append(pkl)
    if key.endswith('chanlocs.csv'):
        body = obj.get()['Body'].read()
        chanlocs = pd.read_csv(io.BytesIO(body))
spatial = lds.DataSet(chanlocs[["X", "Y", "Z"]], "Spatial")
spatialDM = lds.DistanceMatrix(spatial, lms.VectorDifferenceNorm)

print (s)
print (t)

['sub-NDARAA117NEJ', 'sub-NDARAC904DMU']
['task-RestingStateeeg', 'task-RestingStateeeg']


In [18]:
d = {
            "resource_path": f,
            "subjects": s,
            "tasks": t        
    }
descriptor = pd.DataFrame(d)
print (descriptor)

                                       resource_path          subjects  \
0  [[0.0, 2.4754, 52.105, 37.534, 52.535, 76.26, ...  sub-NDARAA117NEJ   
1  [[0.0, 261.64, 375.68, 345.7, 97.827, 381.74, ...  sub-NDARAC904DMU   

                  tasks  
0  task-RestingStateeeg  
1  task-RestingStateeeg  


# Since we read in the pickle directly, we need to modify several functions.

In [19]:
class DistanceMatrix:
    """A distance matrix computed from a DataSet object.

    Parameters
    ----------
    dataset : :obj:`DiskDataSet`
        A dataset on which to compute the distance matrix
    metric : function
        A distance used to compute the distance matrix.

    Attributes
    ----------
    dataset : :obj:`DiskDataSet`
        A dataset on which to compute the distance matrix
    metric : function
        A distance used to compute the distance matrix.
    N : int
        Number of data points in the dataset.
    matrix : :obj:`ndarray`
        The distance matrix.

    """

    def __init__(self, dataset, metric):
        self.DS = dataset
        self.name = self.DS.name
        self.labels = self.DS.D.index.values
        self.label_name = self.DS.D.index.name
        self.metric = metric
        self.metric_name = metric.__name__
        self.n = self.DS.n
        parameterization = parameterize(self.DS)
        self.D = np.zeros([self.n, self.n])
        for i in range(self.n):
            I = parameterization[i]
            for j in range(i + 1):
                J = parameterization[j]
                self.D[i, j] = self.metric.compare(I, J)
                self.D[j, i] = self.D[i, j]
        self.D = pd.DataFrame(self.D)
        self.D.index = self.DS.D.index
        self.D.index.name = self.DS.D.index.name

    def getMatrix(self):
        """Get the distance matrix.

        Returns
        -------
        :obj:`ndarray`
            The distance matrix.

        """
        return self.D

In [20]:
def parameterize(D):
        """Compute the correlation matrix of a single data point.

        Parameters
        ----------
        D : :obj:`DataSet`
            The lemur data set object to parameterize.

        Returns
        -------
        :obj:`list` of :obj:`ndarray`
            The correlation matrix of each object in the dataset.

        """
        with np.errstate(divide = 'ignore', invalid = 'ignore'):
            return list(map(lambda j: np.nan_to_num(np.corrcoef(D.getMatrix(j))), range(D.n)))


In [21]:
class DataSet:
    def __init__(self, D, name="default"):
        self.D = D
        self.n, self.d = self.D.shape
        self.name = name

    def getResource(self, index):
        return self.D.iloc[index, :]

    def saveMetaData(self, filepath):
        metadata = dict(d=self.d, n=self.n, name=self.name)
        string = json.dumps(metadata, indent=2)
        with open(filepath, 'w') as f:
            f.write(string)
        return string

    def getMatrix(self):
        return self.D.as_matrix()

In [22]:
class EEGDataSet:

    def __init__(self, dataframe_descriptor, name="fmri"):
        self.D = dataframe_descriptor
        self.D.index = self.D["subjects"].astype(str) + "-" + self.D["tasks"].astype(str)
        self.D.index.name = "index"
        self.name = name
        self.n = self.D.shape[0]

    def getResource(self, index):
        resource = self.D.ix[index]
        return resource

    def getMatrix(self, index):
        resource_path = self.D.ix[index][0]
        return resource_path.T
#         with open(resource_path, "rb") as f:
#             return pkl.load(f).T

    def getResourceDS(self, index):
        resource = self.getResource(index)
        matrix = self.getMatrix(index)
        D = pd.DataFrame(matrix.T)
        name = "%s/%s"%(resource[1], resource[2])
        DS = DataSet(D, name)
        return DS

In [23]:
class TimeSeriesPlotter:
    """A generic one-to-one plotter for time series data to be extended.

    Parameters
    ----------
    data : :obj:`ndarray`
        The time series data.
    resource_name : string
        The name of the time series being plotted.
    row_name : string
        The name of the rows in the time-series (e.g. channels, sources. ect.).
    column_name : string
        The name of the columns in the time-series (e.g. time points, time steps, seconds, ect.).

    Attributes
    ----------
    data : :obj:`ndarray`
        The time series data.
    d : int
        The number of dimensions in the time series
    n : int
        The number of time points in the time series
    row_name : string
        The name of the rows in the time-series (e.g. channels, sources. ect.).
    column_name : string
        The name of the columns in the time-series (e.g. time points, time steps, seconds, ect.).
    resource_name : string
        The name of the time series being plotted.

    """

    def __init__(self, DS, mode="notebook", base_path=None):
        self.data = DS.D.as_matrix().T
        self.d, self.n = self.data.shape
        self.d_names = DS.D.columns
        self.row_name = "Channels"
        self.col_name = "Time Points"
        self.resource_name = DS.name
        self.plot_mode = mode
        self.base_path = base_path

    def makeplot(self, fig, local_path=None):
        """Make the plotly figure visable to the user in the way they want.

        Parameters
        ----------
        gid : :obj:`figure`
            An plotly figure.

        """

        if self.plot_mode == "notebook":
            iplot(fig)
        if self.plot_mode == "savediv":
            fig["layout"]["autosize"] = True
            div = plot(fig, output_type='div', include_plotlyjs=True)
            path = os.path.join(self.base_path, local_path + ".html")
            os.makedirs("/".join(path.split("/")[:-1]), exist_ok=True)
            with open(path, "w") as f:
                f.write(div)
                f.close()

        if self.plot_mode == "div":
            fig["layout"]["autosize"] = True
            return plot(fig, output_type='div', include_plotlyjs=True)

In [24]:
class SparkLinePlotter(TimeSeriesPlotter):
    titlestring = "Sparklines for %s"
    shortname = "sparkline"

    def plot(self, sample_freq):
        """Constructs a downsampled spark line plot of the time series.

        If there are more than 500 time points, the time series will be down sampled to
        500 column variables by windowed averaging. This is done by splitting the time series
        into 500 equal sized segments in the time domain, then plotting the mean for each segment.

        Parameters
        ----------
        sample_freq : int
            The sampling frequency (how many times sampled per second).

        """
        title = self.titlestring % (self.resource_name)
        xaxis = dict(
            title = "Time in Seconds"
        )
        yaxis = dict(
            title = "Intensity"
        )
        layout = dict(title=title, xaxis=xaxis, yaxis=yaxis)
        if self.n > 500:
            winsize = self.n // 500
            df = pd.DataFrame(self.data.T)
            df = df.groupby(lambda x: x // winsize).mean()
            downsampled_data = df.as_matrix().T
            data = [dict(mode="lines",
                         name = str(i),
                         x=(np.arange(downsampled_data.shape[1]) * winsize) / sample_freq,
                         y=downsampled_data[i, :]) for i in range(downsampled_data.shape[0])]
        fig = dict(data=data, layout=layout)
        self.makeplot(fig, self.resource_name + "/" + self.shortname)

In [25]:
class MatrixPlotter:
    def __init__(self, DS, mode="notebook", base_path = None):
        self.DS = DS
        self.plot_mode = mode
        self.base_path = base_path

        Reds = cl.scales['8']['seq']['Reds']
        self.Reds = list(zip(np.linspace(0, 1, len(Reds)), Reds))

        BuRd = cl.scales['11']['div']['RdBu'][::-1]
        self.BuRd = list(zip(np.linspace(0, 1, len(BuRd)), BuRd))

    def makeplot(self, fig, local_path=None):
        """Make the plotly figure visable to the user in the way they want.

        Parameters
        ----------
        gid : :obj:`figure`
            An plotly figure.

        """

        if self.plot_mode == "notebook":
            iplot(fig)
        if self.plot_mode == "savediv":
            fig["layout"]["autosize"] = True
            div = plot(fig, output_type='div', include_plotlyjs=True)
            path = os.path.join(self.base_path, local_path + ".html")
            os.makedirs("/".join(path.split("/")[:-1]), exist_ok=True)
            with open(path, "w") as f:
                f.write(div)
                f.close()

        if self.plot_mode == "div":
            fig["layout"]["autosize"] = True
            return plot(fig, output_type='div', include_plotlyjs=True)

In [26]:
class ConnectedScatterplot(MatrixPlotter):
    titlestring = "%s Scatterplot"
    shortname = "connectedscatter"

    def plot(self, spatialDM):
        """Constructs a distance matrix heatmap using the :obj:`DistanceMatrix` object, in plotly.

        """
        title = self.titlestring % (self.DS.name)
        DM = self.DS.D.as_matrix()
        sDM = spatialDM.D.as_matrix()
        colors = (np.nansum(DM, axis=0) - 1) / DM.shape[0]
        TSNEEmbedder = leb.TSNEEmbedder(num_components=2)
        m = TSNEEmbedder.embed(sDM)
        xaxis = go.XAxis(title=spatialDM.D.index.name)
        yaxis = go.YAxis(scaleanchor="x", title=spatialDM.D.index.name)
        layout = dict(title=title, xaxis=xaxis, yaxis=yaxis, showlegend=False)
        trace1 = go.Scatter(x = m[:, 0],
                           y = m[:, 1],
                           mode = "markers",
                           marker = dict(color=colors, size=16, showscale=True))
        distances = np.zeros([m.shape[0], m.shape[0]])
        for i in range(m.shape[0]):
            for j in range(m.shape[0]):
                distances[i, j] = np.linalg.norm(m[i, :] - m[j, :])
        med = np.median(distances)
        mask = distances < (med / 4)
        Xe = []
        Ye = []
        for i in range(distances.shape[0]):
            for j in range(distances.shape[1]):
                if mask[i, j]:
                    Xe += [m[i, 0], m[j, 0], None]
                    Ye += [m[i, 1], m[j, 1], None]
        trace2 = go.Scatter(x=Xe,
                            y=Ye,
                            mode='lines',
                            line=go.Line(color='rgb(125,125,125)', width=1),
                            hoverinfo='none')
        data = [trace1, trace2]
        fig = dict(data=data, layout=layout)
        return self.makeplot(fig, self.DS.name + "/" + self.shortname)

In [27]:
# Use modified functions to read in new dataframe
import numpy as np
eds = EEGDataSet(descriptor)
# Create a lemur distance matrix based on the EEG data
DM = DistanceMatrix(eds, lms.FroCorr)
DM.name = "eeg-DistanceMatrix"



.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated



In [34]:
# Create an embedded distance matrix object under MDS
MDSEmbedder = leb.MDSEmbedder(num_components=10)
EEG_Embedded = MDSEmbedder.embed(DM)
for i in range(eds.n):
    single_ds = eds.getResourceDS(i)
    SparkLinePlotter(single_ds, mode="savediv", base_path=out_base).plot(sample_freq=500)

In [29]:
for i in range(eds.n):
    single_ds = eds.getResourceDS(i)
    single_DM = lds.DataSet(single_ds.D.corr(), single_ds.name)
    SpatialConnectivity(single_DM, mode="savediv",
                            base_path=out_base).plot(spatial)

In [30]:
for i in range(eds.n):
    single_ds = eds.getResourceDS(i)
    single_DM = lds.DataSet(single_ds.D.corr(), single_ds.name)
    ConnectedScatterplot(single_DM,
                             mode="savediv",
                             base_path=out_base).plot(spatialDM)

In [32]:
SquareHeatmap(DM, mode="savediv", base_path=out_base).plot()
Heatmap(EEG_Embedded, mode="savediv", base_path=out_emb_base).plot()

In [None]:
# lpl.EigenvectorHeatmap(DM, mode="savediv", base_path=out_base).plot()
# lpl.EigenvectorHeatmap(EEG_Embedded, mode="savediv",
#                        base_path=out_emb_base).plot()

In [None]:
for dirname, dirnames, filenames in os.walk('/Users/YujiaLiu/Desktop/test'):
    for filename in filenames:
        if not filename.endswith('DS_Store'):
            print(os.path.join(dirname, filename))

In [None]:
for dirname, dirnames, filenames in os.walk('/Users/YujiaLiu/Desktop/download_test/data/'):
    for filename in filenames:
        if filename.endswith('.html'):
            print(os.path.join(dirname, filename))