Skip to content

Commit

Permalink
refactor eval/tuple functions
Browse files Browse the repository at this point in the history
small fix to literal_col_eval call
  • Loading branch information
mikejhuang committed Nov 23, 2022
1 parent feec27b commit 989582d
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 76 deletions.
Original file line number Diff line number Diff line change
@@ -1,32 +1,20 @@
import pandas as pd
from typing import Iterable, List
import ast
from typing import Iterable

from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.abcs import BehaviorProjectBase # noqa: E501
from allensdk.brain_observatory.behavior.behavior_session import (
BehaviorSession)
from allensdk.brain_observatory.behavior.behavior_ophys_experiment import (
BehaviorOphysExperiment)

from allensdk.brain_observatory.behavior.swdb.utilities import literal_col_eval
from allensdk.brain_observatory.behavior.behavior_project_cache.\
project_apis.data_io.project_cloud_api_base import ProjectCloudApiBase # noqa: E501


def literal_col_eval(df: pd.DataFrame,
columns: List[str] = ["ophys_experiment_id",
"ophys_container_id",
"driver_line"]) -> pd.DataFrame:
def converter(x):
if isinstance(x, str):
x = ast.literal_eval(x)
return x

for column in columns:
if column in df.columns:
df.loc[df[column].notnull(), column] = \
df[column][df[column].notnull()].apply(converter)
return df

COL_EVAL_LIST = ["ophys_experiment_id",
"ophys_container_id",
"driver_line"]

class BehaviorProjectCloudApi(BehaviorProjectBase, ProjectCloudApiBase):

Expand Down Expand Up @@ -127,7 +115,8 @@ def _get_ophys_session_table(self):
session_table_path = self._get_metadata_path(
fname="ophys_session_table")
df = literal_col_eval(pd.read_csv(session_table_path,
dtype={'mouse_id': str}))
dtype={'mouse_id': str}),
columns=COL_EVAL_LIST)
df['date_of_acquisition'] = pd.to_datetime(df['date_of_acquisition'])
self._ophys_session_table = df.set_index("ophys_session_id")

Expand All @@ -148,7 +137,8 @@ def _get_behavior_session_table(self):
session_table_path = self._get_metadata_path(
fname='behavior_session_table')
df = literal_col_eval(pd.read_csv(session_table_path,
dtype={'mouse_id': str}))
dtype={'mouse_id': str}),
columns=COL_EVAL_LIST)
df['date_of_acquisition'] = pd.to_datetime(df['date_of_acquisition'])

self._behavior_session_table = df.set_index("behavior_session_id")
Expand All @@ -174,15 +164,17 @@ def _get_ophys_experiment_table(self):
experiment_table_path = self._get_metadata_path(
fname="ophys_experiment_table")
df = literal_col_eval(pd.read_csv(experiment_table_path,
dtype={'mouse_id': str}))
dtype={'mouse_id': str}),
columns=COL_EVAL_LIST)
df['date_of_acquisition'] = pd.to_datetime(df['date_of_acquisition'])

self._ophys_experiment_table = df.set_index("ophys_experiment_id")

def _get_ophys_cells_table(self):
ophys_cells_table_path = self._get_metadata_path(
fname="ophys_cells_table")
df = literal_col_eval(pd.read_csv(ophys_cells_table_path))
df = literal_col_eval(pd.read_csv(ophys_cells_table_path),
columns=COL_EVAL_LIST)
# NaN's for invalid cells force this to float, push to int
df['cell_specimen_id'] = pd.array(df['cell_specimen_id'],
dtype="Int64")
Expand Down
91 changes: 74 additions & 17 deletions allensdk/brain_observatory/behavior/swdb/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from typing import List
import ast

'''
This file contains a set of functions that are useful in analyzing visual behavior data
Expand All @@ -17,17 +19,22 @@ def save_figure(fig, figsize, save_dir, folder, filename, formats=['.png']):
fig: a figure object
figsize: tuple of desired figure size
save_dir: string, the directory to save the figure
folder: string, the sub-folder to save the figure in. if the folder does not exist, it will be created
folder: string, the sub-folder to save the figure in. if the
folder does not exist, it will be created
filename: string, the desired name of the saved figure
formats: a list of file formats as strings to save the figure as, ex: ['.png','.pdf']
formats: a list of file formats as strings to save the figure
as, ex: ['.png','.pdf']
'''
fig_dir = os.path.join(save_dir, folder)
if not os.path.exists(fig_dir):
os.mkdir(fig_dir)
mpl.rcParams['pdf.fonttype'] = 42
fig.set_size_inches(figsize)
for f in formats:
fig.savefig(os.path.join(fig_dir, fig_title + f), transparent=True, orientation='landscape')
fig.savefig(
os.path.join(fig_dir, fig_title + f),
transparent=True,
orientation='landscape')


def get_dff_matrix(session):
Expand All @@ -46,27 +53,38 @@ def get_dff_matrix(session):

def get_mean_df(response_df, conditions=['cell_specimen_id', 'image_name']):
'''
Computes an analysis on a selection of responses (either flashes or trials). Computes mean_response, sem_response, the pref_stim, fraction_active_responses.
Computes an analysis on a selection of responses (either flashes
or trials). Computes mean_response, sem_response, the pref_stim,
fraction_active_responses.
INPUTS
response_df: the dataframe to group
conditions: the conditions to group by, the first entry should be 'cell_specimen_id', the second could be 'image_name' or 'change_image_name'
conditions: the conditions to group by, the first entry should be
'cell_specimen_id', the second could be 'image_name' or
'change_image_name'
OUTPUTS:
mdf: a dataframe with the following columns:
mean_response: the average mean_response for each condition
sem_response: the sem of the mean_response
mean_trace: the average dff trace for each condition
sem_trace: the sem of the mean_trace
mean_responses: the list of mean_responses for each element of each group
pref_stim: if conditions includes image_name or change_image_name, sets a boolean column for whether that was the cell's preferred stimulus
fraction_significant_responses: the fraction of individual image presentations or trials that were significant (p_value > 0.05)
mean_responses: the list of mean_responses for each element
of each group
pref_stim: if conditions includes image_name or
change_image_name, sets a boolean column for whether
that was the cell's preferred stimulus
fraction_significant_responses: the fraction of
individual image presentations or trials that were
significant (p_value > 0.05)
'''

# Group by conditions
rdf = response_df.copy()
mdf = rdf.groupby(conditions).apply(get_mean_sem_trace)
mdf = mdf[['mean_response', 'sem_response', 'mean_trace', 'sem_trace', 'mean_responses']]
mdf = mdf[
['mean_response', 'sem_response', 'mean_trace',
'sem_trace', 'mean_responses']]
mdf = mdf.reset_index()

# Add preferred stimulus if we can
Expand All @@ -91,7 +109,8 @@ def get_mean_sem_trace(group):
group: a pandas groupby object
OUTPUT:
a pandas series with the mean_response, sem_response, mean_trace, sem_trace, and mean_responses computed for the group.
a pandas series with the mean_response, sem_response,
mean_trace, sem_trace, and mean_responses computed for the group.
'''
mean_response = np.mean(group['mean_response'])
mean_responses = group['mean_response'].values
Expand All @@ -105,13 +124,18 @@ def get_mean_sem_trace(group):

def annotate_mean_df_with_pref_stim(mean_df):
'''
Computes the preferred stimulus for each cell/trial or cell/flash combination. Preferred image is computed by seeing which image evoked the largest average mean_response across all images.
Computes the preferred stimulus for each cell/trial or
cell/flash combination. Preferred image is computed by seeing
which image evoked the largest average mean_response across
all images.
INPUTS:
mean_df: the mean_df to be annotated
OUTPUTS:
mean_df with a new column appended 'pref_stim' which is a boolean TRUE/FALSE for whether that image was that cell's preferred image.
mean_df with a new column appended 'pref_stim' which is a
boolean TRUE/FALSE for whether that image was that cell's
preferred image.
ASSERTS:
Each cell has one unique preferred stimulus
Expand All @@ -131,10 +155,13 @@ def annotate_mean_df_with_pref_stim(mean_df):
for cell in mdf['cell_specimen_id'].unique():
mc = mdf[(mdf['cell_specimen_id'] == cell)]
mc = mc[mc[image_name] != 'omitted']
temp = mc[(mc.mean_response == np.max(mc.mean_response.values))][image_name].values
temp = mc[
(mc.mean_response == np.max(mc.mean_response.values))
][image_name].values
if len(temp) > 0: # need this test if the mean_response was nan
pref_image = temp[0]
# PROBLEM, this is slow, and sets on slice, better to use mdf.at[test, 'pref_stim']
# PROBLEM, this is slow, and sets on slice,
# better to use mdf.at[test, 'pref_stim']
row = mdf[(mdf['cell_specimen_id'] == cell) & (mdf[image_name] == pref_image)].index
mdf.loc[row, 'pref_stim'] = True

Expand Down Expand Up @@ -355,11 +382,41 @@ def get_active_cell_indices(dff_traces):


def compute_lifetime_sparseness(image_responses):
# image responses should be an array of the trial averaged responses to each image
# sparseness = 1-(sum of trial averaged responses to images / N)squared / (sum of (squared mean responses / n)) / (1-(1/N))
# image responses should be an array of the trial averaged responses
# to each image
# sparseness = 1-(sum of trial averaged responses to images / N)squared /
# (sum of (squared mean responses / n)) / (1-(1/N))
# N = number of images
# after Vinje & Gallant, 2000; Froudarakis et al., 2014
N = float(len(image_responses))
ls = ((1 - (1 / N) * ((np.power(image_responses.sum(axis=0), 2)) / (np.power(image_responses, 2).sum(axis=0)))) / (
ls = ((1 - (1 / N) * ((np.power(image_responses.sum(axis=0), 2)) /
(np.power(image_responses, 2).sum(axis=0)))) / (
1 - (1 / N)))
return ls

def literal_col_eval(df: pd.DataFrame,
columns: List[str]) -> pd.DataFrame:
''' Eval string entries of specified columns
'''

for column in columns:
if column in df.columns:
df.loc[df[column].notnull(), column] = \
df[column][df[column].notnull()].apply(
lambda x: ast.literal_eval(x) if isinstance(x, str) else x
)
return df


def df_list_to_tuple(df: pd.DataFrame,
columns: List[str]) -> pd.DataFrame:
''' convert list to tuple
'''

for column in columns:
if column in df.columns:
df.loc[df[column].notnull(), column] = \
df[column][df[column].notnull()].apply(
lambda x: tuple(x) if isinstance(x, list) else x
)
return df
27 changes: 10 additions & 17 deletions allensdk/brain_observatory/ecephys/ecephys_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import scipy.stats
import xarray as xr

from allensdk.brain_observatory.behavior.swdb.utilities import literal_col_eval, df_list_to_tuple
from allensdk.brain_observatory.ecephys.ecephys_session_api import (
EcephysNwb1Api,
EcephysNwbSessionApi,
Expand Down Expand Up @@ -1180,26 +1181,18 @@ def _build_stimulus_presentations(
stimulus_presentations, default_column_renames, ignore_case=False
)

# pandas groupby ops ignore nans, so we need a new "nonapplicable"
# value that pandas does not recognize as null ...
# stimulus_presentations.replace("", nonapplicable, inplace=True)

# pandas does not automatically convert boolean cols for fillna
# boolean_colnames = stimulus_presentations.dtypes[
# stimulus_presentations.dtypes == "boolean"].index
# col_type_map = {colname: "object" for colname in boolean_colnames}
# stimulus_presentations = stimulus_presentations.astype(
# col_type_map).fillna(nonapplicable)
stimulus_presentations.replace("", nonapplicable, inplace=True)

# eval str(numeric) and str(lists), convert lists to tuple for
# dict key compatibility
exclude_columns = ["stimulus_name"]
for colname in stimulus_presentations.columns:
if colname not in exclude_columns:
stimulus_presentations[colname] = stimulus_presentations[
colname
].apply(naming_utilities.eval_str)


col_list = ["phase, size, spatial_frequency"]
stimulus_presentations = literal_col_eval(
stimulus_presentations,
columns=col_list)
stimulus_presentations = df_list_to_tuple(
stimulus_presentations,
columns=col_list)
stimulus_presentations["duration"] = (
stimulus_presentations["stop_time"]
- stimulus_presentations["start_time"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,24 +195,3 @@ def map_column_names(table, name_map=None, ignore_case=True):
output = table.rename(columns=name_map)

return output


def eval_str(val):
"""Evaluates value if its type==str(numeric) or type==str(list)
Parameters
----------
val: any
Returns
-------
val: evaluated val if its type==str(numeric) or type==str(list)
or passes the val through if it does not meet the condition
"""

if isinstance(val, str):
if val.replace(".", "").isdigit(): # checks if val is numeric
val = eval(val)
elif val[0] == "[" and val[-1] == "]": # checks if val is list
val = tuple(eval(val))
return val

0 comments on commit 989582d

Please sign in to comment.