Skip to content

Commit

Permalink
Merge pull request #2609 from AllenInstitute/ticket/2594/dev
Browse files Browse the repository at this point in the history
Ticket/2594/dev
  • Loading branch information
mikejhuang committed Nov 28, 2022
2 parents 4245587 + 25c055d commit 28e8497
Show file tree
Hide file tree
Showing 6 changed files with 140,485 additions and 90,291 deletions.
Original file line number Diff line number Diff line change
@@ -1,31 +1,21 @@
import pandas as pd
from typing import Iterable, List
import ast
from typing import Iterable

from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.abcs import BehaviorProjectBase # noqa: E501
from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.abcs import ( # noqa: E501
BehaviorProjectBase,
)
from allensdk.brain_observatory.behavior.behavior_session import (
BehaviorSession)
BehaviorSession,
)
from allensdk.brain_observatory.behavior.behavior_ophys_experiment import (
BehaviorOphysExperiment)
BehaviorOphysExperiment,
)
from allensdk.core.utilities import literal_col_eval
from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io.project_cloud_api_base import ( # noqa: E501
ProjectCloudApiBase,
)

from allensdk.brain_observatory.behavior.behavior_project_cache.\
project_apis.data_io.project_cloud_api_base import ProjectCloudApiBase # noqa: E501


def literal_col_eval(df: pd.DataFrame,
columns: List[str] = ["ophys_experiment_id",
"ophys_container_id",
"driver_line"]) -> pd.DataFrame:
def converter(x):
if isinstance(x, str):
x = ast.literal_eval(x)
return x

for column in columns:
if column in df.columns:
df.loc[df[column].notnull(), column] = \
df[column][df[column].notnull()].apply(converter)
return df
COL_EVAL_LIST = ["ophys_experiment_id", "ophys_container_id", "driver_line"]


class BehaviorProjectCloudApi(BehaviorProjectBase, ProjectCloudApiBase):
Expand All @@ -34,26 +24,32 @@ class BehaviorProjectCloudApi(BehaviorProjectBase, ProjectCloudApiBase):

def _load_manifest_tables(self):

expected_metadata = set(["behavior_session_table",
"ophys_session_table",
"ophys_experiment_table",
"ophys_cells_table"])
expected_metadata = set(
[
"behavior_session_table",
"ophys_session_table",
"ophys_experiment_table",
"ophys_cells_table",
]
)

cache_metadata = set(self.cache._manifest.metadata_file_names)

if cache_metadata != expected_metadata:
raise RuntimeError("expected S3CloudCache object to have "
f"metadata file names: {expected_metadata} "
f"but it has {cache_metadata}")
raise RuntimeError(
"expected S3CloudCache object to have "
f"metadata file names: {expected_metadata} "
f"but it has {cache_metadata}"
)

self._get_ophys_session_table()
self._get_behavior_session_table()
self._get_ophys_experiment_table()
self._get_ophys_cells_table()

def get_behavior_session(
self,
behavior_session_id: int) -> BehaviorSession:
self, behavior_session_id: int
) -> BehaviorSession:
"""get a BehaviorSession by specifying behavior_session_id
Parameters
Expand All @@ -79,25 +75,28 @@ def get_behavior_session(
"""
row = self._behavior_session_table.query(
f"behavior_session_id=={behavior_session_id}")
f"behavior_session_id=={behavior_session_id}"
)
if row.shape[0] != 1:
raise RuntimeError("The behavior_session_table should have "
"1 and only 1 entry for a given "
"behavior_session_id. For "
f"{behavior_session_id} "
f" there are {row.shape[0]} entries.")
raise RuntimeError(
"The behavior_session_table should have "
"1 and only 1 entry for a given "
"behavior_session_id. For "
f"{behavior_session_id} "
f" there are {row.shape[0]} entries."
)
row = row.squeeze()
has_file_id = not pd.isna(row[self.cache.file_id_column])
if not has_file_id:
oeid = row.ophys_experiment_id[0]
row = self._ophys_experiment_table.query(f"index=={oeid}")
file_id = str(int(row[self.cache.file_id_column]))
data_path = self._get_data_path(file_id=file_id)
return BehaviorSession.from_nwb_path(
nwb_path=str(data_path))
return BehaviorSession.from_nwb_path(nwb_path=str(data_path))

def get_behavior_ophys_experiment(self, ophys_experiment_id: int
) -> BehaviorOphysExperiment:
def get_behavior_ophys_experiment(
self, ophys_experiment_id: int
) -> BehaviorOphysExperiment:
"""get a BehaviorOphysExperiment by specifying ophys_experiment_id
Parameters
Expand All @@ -111,24 +110,29 @@ def get_behavior_ophys_experiment(self, ophys_experiment_id: int
"""
row = self._ophys_experiment_table.query(
f"index=={ophys_experiment_id}")
f"index=={ophys_experiment_id}"
)
if row.shape[0] != 1:
raise RuntimeError("The behavior_ophys_experiment_table should "
"have 1 and only 1 entry for a given "
f"ophys_experiment_id. For "
f"{ophys_experiment_id} "
f" there are {row.shape[0]} entries.")
raise RuntimeError(
"The behavior_ophys_experiment_table should "
"have 1 and only 1 entry for a given "
f"ophys_experiment_id. For "
f"{ophys_experiment_id} "
f" there are {row.shape[0]} entries."
)
file_id = str(int(row[self.cache.file_id_column]))
data_path = self._get_data_path(file_id=file_id)
return BehaviorOphysExperiment.from_nwb_path(
str(data_path))
return BehaviorOphysExperiment.from_nwb_path(str(data_path))

def _get_ophys_session_table(self):
session_table_path = self._get_metadata_path(
fname="ophys_session_table")
df = literal_col_eval(pd.read_csv(session_table_path,
dtype={'mouse_id': str}))
df['date_of_acquisition'] = pd.to_datetime(df['date_of_acquisition'])
fname="ophys_session_table"
)
df = literal_col_eval(
pd.read_csv(session_table_path, dtype={"mouse_id": str}),
columns=COL_EVAL_LIST,
)
df["date_of_acquisition"] = pd.to_datetime(df["date_of_acquisition"])
self._ophys_session_table = df.set_index("ophys_session_id")

def get_ophys_session_table(self) -> pd.DataFrame:
Expand All @@ -146,10 +150,13 @@ def get_ophys_session_table(self) -> pd.DataFrame:

def _get_behavior_session_table(self):
session_table_path = self._get_metadata_path(
fname='behavior_session_table')
df = literal_col_eval(pd.read_csv(session_table_path,
dtype={'mouse_id': str}))
df['date_of_acquisition'] = pd.to_datetime(df['date_of_acquisition'])
fname="behavior_session_table"
)
df = literal_col_eval(
pd.read_csv(session_table_path, dtype={"mouse_id": str}),
columns=COL_EVAL_LIST,
)
df["date_of_acquisition"] = pd.to_datetime(df["date_of_acquisition"])

self._behavior_session_table = df.set_index("behavior_session_id")

Expand All @@ -172,20 +179,27 @@ def get_behavior_session_table(self) -> pd.DataFrame:

def _get_ophys_experiment_table(self):
experiment_table_path = self._get_metadata_path(
fname="ophys_experiment_table")
df = literal_col_eval(pd.read_csv(experiment_table_path,
dtype={'mouse_id': str}))
df['date_of_acquisition'] = pd.to_datetime(df['date_of_acquisition'])
fname="ophys_experiment_table"
)
df = literal_col_eval(
pd.read_csv(experiment_table_path, dtype={"mouse_id": str}),
columns=COL_EVAL_LIST,
)
df["date_of_acquisition"] = pd.to_datetime(df["date_of_acquisition"])

self._ophys_experiment_table = df.set_index("ophys_experiment_id")

def _get_ophys_cells_table(self):
ophys_cells_table_path = self._get_metadata_path(
fname="ophys_cells_table")
df = literal_col_eval(pd.read_csv(ophys_cells_table_path))
fname="ophys_cells_table"
)
df = literal_col_eval(
pd.read_csv(ophys_cells_table_path), columns=COL_EVAL_LIST
)
# NaN's for invalid cells force this to float, push to int
df['cell_specimen_id'] = pd.array(df['cell_specimen_id'],
dtype="Int64")
df["cell_specimen_id"] = pd.array(
df["cell_specimen_id"], dtype="Int64"
)
self._ophys_cells_table = df.set_index("cell_roi_id")

def get_ophys_cells_table(self):
Expand All @@ -205,7 +219,7 @@ def get_ophys_experiment_table(self):
return self._ophys_experiment_table

def get_natural_movie_template(self, number: int) -> Iterable[bytes]:
""" Download a template for the natural movie stimulus. This is the
"""Download a template for the natural movie stimulus. This is the
actual movie that was shown during the recording session.
:param number: identifier for this scene
:type number: int
Expand Down
70 changes: 47 additions & 23 deletions allensdk/brain_observatory/ecephys/ecephys_session.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
import warnings
from collections.abc import Collection
from collections import defaultdict
from collections.abc import Collection
from typing import Optional

import xarray as xr
import numpy as np
import pandas as pd
import scipy.stats
import xarray as xr

from allensdk.core.lazy_property import LazyPropertyMixin
from allensdk.core.utilities import literal_col_eval, df_list_to_tuple
from allensdk.brain_observatory.ecephys.ecephys_session_api import (
EcephysSessionApi,
EcephysNwbSessionApi,
EcephysNwb1Api)
EcephysNwb1Api,
EcephysNwbSessionApi,
EcephysSessionApi,
)
from allensdk.brain_observatory.ecephys.stimulus_table import naming_utilities
from allensdk.brain_observatory.ecephys.stimulus_table._schemas import (
default_column_renames,
default_stimulus_renames,
default_column_renames)
)
from allensdk.core.lazy_property import LazyPropertyMixin

# stimulus_presentation column names not describing a parameter of a stimulus
NON_STIMULUS_PARAMETERS = tuple([
Expand Down Expand Up @@ -356,16 +359,17 @@ def get_current_source_density(self, probe_id):
Returns
-------
xr.DataArray :
dimensions are channel (id) and time (seconds, relative to stimulus
onset). Values are current source density assessed on that
channel at that time (V/m^2)
dimensions are channel (id) and time (seconds, relative to
stimulus onset). Values are current source density assessed
on that channel at that time (V/m^2)
"""

return self.api.get_current_source_density(probe_id)

def get_lfp(self, probe_id, mask_invalid_intervals=True):
''' Load an xarray DataArray with LFP data from channels on a single probe
''' Load an xarray DataArray with LFP data from channels on a
single probe
Parameters
----------
Expand Down Expand Up @@ -979,8 +983,8 @@ def get_parameter_values_for_stimulus(
self,
stimulus_name,
drop_nulls=True):
""" For each stimulus parameter, report the unique values taken on by that
parameter while a named stimulus was presented.
""" For each stimulus parameter, report the unique values taken
on by that parameter while a named stimulus was presented.
Parameters
----------
Expand All @@ -1004,8 +1008,8 @@ def get_stimulus_parameter_values(
self,
stimulus_presentation_ids=None,
drop_nulls=True):
''' For each stimulus parameter, report the unique values taken on by that
parameter throughout the course of the session.
''' For each stimulus parameter, report the unique values taken
on by that parameter throughout the course of the session.
Parameters
----------
Expand Down Expand Up @@ -1036,7 +1040,6 @@ def get_stimulus_parameter_values(

non_null = np.array(uniques[uniques != "null"])
non_null = non_null
non_null = np.sort(non_null)

if not drop_nulls and "null" in uniques:
non_null = np.concatenate([non_null, ["null"]])
Expand Down Expand Up @@ -1124,12 +1127,29 @@ def _build_stimulus_presentations(
# pandas groupby ops ignore nans, so we need a new "nonapplicable"
# value that pandas does not recognize as null ...
stimulus_presentations.replace("", nonapplicable, inplace=True)
stimulus_presentations.fillna(nonapplicable, inplace=True)

stimulus_presentations['duration'] = \
stimulus_presentations['stop_time'] - \
stimulus_presentations['start_time']

# pandas does not automatically convert boolean cols for fillna
boolean_colnames = stimulus_presentations.dtypes[
stimulus_presentations.dtypes == "boolean"].index
col_type_map = {colname: "object" for colname in boolean_colnames}
stimulus_presentations = stimulus_presentations.astype(
col_type_map).fillna(nonapplicable)

# eval str(numeric) and str(lists)
# convert lists to tuple for hashability
# Rationale: pd dataframe reads values as str from nwb files
# where they are expected to be float
col_list = ["phase, size, spatial_frequency"]
stimulus_presentations = literal_col_eval(
stimulus_presentations,
columns=col_list)
stimulus_presentations = df_list_to_tuple(
stimulus_presentations,
columns=col_list)
stimulus_presentations["duration"] = (
stimulus_presentations["stop_time"]
- stimulus_presentations["start_time"]
)
# TODO: database these
stimulus_conditions = {}
presentation_conditions = []
Expand Down Expand Up @@ -1241,7 +1261,10 @@ def _build_mean_waveforms(self, mean_waveforms):

channel_id_lut = defaultdict(lambda: -1)
for cid, row in self.channels.iterrows():
channel_id_lut[(row["local_index"], row["probe_id"])] = cid
channel_id_lut[(
row["probe_channel_number"],
row["probe_id"],
)] = cid

probe_id_lut = {
uid: row['probe_id'] for uid, row in self._units.iterrows()
Expand Down Expand Up @@ -1438,7 +1461,8 @@ def is_distinct_from(left, right):


def array_intervals(array):
""" find interval bounds (bounding consecutive identical values) in an array
""" find interval bounds (bounding consecutive identical values)
in an array
Parameters
-----------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,3 @@ def map_column_names(table, name_map=None, ignore_case=True):
output = table.rename(columns=name_map)

return output
#
Loading

0 comments on commit 28e8497

Please sign in to comment.