In [9]:
from allensdk.core.brain_observatory_cache import BrainObservatoryCache

In [10]:
boc = BrainObservatoryCache(manifest_file='/local1/data/boc/manifest.json',)

In [11]:
oeid = 541206592

# Initializations:
nwb_dataset = boc.get_ophys_experiment_data(oeid)


In [12]:
import pandas as pd
import xarray as xr

from allensdk.brain_observatory.natural_scenes import NaturalScenes

def get_ns_msrx(nwb_dataset):
    ns = NaturalScenes(nwb_dataset)
    mean_sweep_response = ns.mean_sweep_response.copy()
    
    # I don't know what dx is. goodbye!
    mean_sweep_response.drop('dx',axis=1,inplace=True)
    
    # annotate the dataframe with useful indices and columns
    time = pd.Series(
        ns.timestamps[ns.stim_table['start']],
        name='time',
    )
    neurons = pd.Series(
        ns.cell_id,
        name='neuron',
    )
    mean_sweep_response.set_index(time,inplace=True)
    mean_sweep_response.columns = neurons
    
    # convert this to xarray & annotate images
    srx = xr.DataArray(mean_sweep_response)
    srx.coords['natural_image'] = ('time',ns.stim_table['frame'])
    
    return srx

In [13]:
msrx = get_ns_msrx(nwb_dataset)
print msrx

<xarray.DataArray (time: 5950, neuron: 154)>
array([[ 2.750398,  3.113332,  3.283231, ...,  3.246232, 17.838305, 44.883263],
       [ 5.472741,  4.520462,  1.848134, ...,  3.700325, 41.864319, 55.052734],
       [ 4.938696,  1.872071,  0.822514, ...,  1.446858, 19.081219, -0.731454],
       ...,
       [ 0.984122, -1.035176,  2.519683, ..., -0.894795, 14.030047,  1.291967],
       [ 1.705522,  1.379635,  1.116845, ...,  0.169055,  6.26528 ,  1.80034 ],
       [ 0.420941,  2.543081,  2.991403, ...,  0.980473, -0.720777,  4.779262]])
Coordinates:
  * time           (time) float64 545.2 545.5 545.7 546.0 546.2 546.5 546.7 ...
  * neuron         (neuron) int64 541510267 541510270 541510307 541510405 ...
    natural_image  (time) int64 92 27 52 37 103 1 54 19 54 -1 74 115 44 88 ...


In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [7]:
import numpy as np
def decode(msrx):
    
    # get features and output
    X = msrx.data
    y = msrx['natural_image']
    
    # split training & testing
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,stratify=y)
    
    # do the classification
    lm = LogisticRegression(
        solver='saga',
        multi_class='ovr',
        penalty='l1',
        n_jobs=-1,
    )
    lm.fit(X_train,y_train)
    return lm.score(X_test,y_test) * len(np.unique(y))

In [8]:
%%timeit -n 1 -r 1
print decode(msrx)



19.400000000000002
1 loop, best of 1: 49.1 s per loop
