# Run dayData module to create multiDayData for analysis

multiDayData is a dictionary where each entry holds the dayData class for a single day,  \
where each dayData class runs calculations such as finding circular distances between  \
reward-relative spatial firing peaks and comparing to a shuffle, for each animal.  \
Most attributes of dayData have an entry for each animal.

Requires `multi_anim_sess` to already be saved for each day, which is a dictionary containing  \
the sess data, dF/F, and place cell booleans for each animal. 


In [1]:
%matplotlib inline
# inline, widget

import os
import pickle
import dill
import numpy as np
import warnings
from datetime import datetime

from reward_relative import utilities as ut
from reward_relative import dayData as dd
    

%load_ext autoreload
%autoreload 2

save_figures = False

In [2]:
from reward_relative.path_dict_firebird import path_dictionary as path_dict
path_dict

{'preprocessed_root': '/data/2P',
 'sbx_root': '/mnt/oak/InVivoDA/2P_Data',
 'gdrive_root': '/mnt/gdrive/2P_Data',
 'VR_Data': '/data/2P/VR_Data',
 'git_repo_root': '/home/mari/local_repos/2p_repos',
 'TwoPUtils': '/home/mari/local_repos/2p_repos/TwoPUtils',
 'home': '/home/mari',
 'fig_dir': '/data/2P/fig_scratch'}

# Create multiDayData class for each experiment day

In [15]:
## Specify parameters (these are already defaults in dayData class)
bin_size = 10  # for quantifying distribution of place field peak locations
sigma = 1  # for smoothing
smooth = False  # whether to smooth for finding place cell peaks
exclude_int = True  # exclude putative interneurons
int_thresh = 0.5
impute_NaNs = True # whether to impute (interpolate) bins that are NaN in spatially-binned data

## Place cell definitions:
## 'and' = must have significant spatial information 
##        in trial set 0 AND trial set 1 (i.e. before and after the reward switch)
## 'or' = must have signitive spatial information in trial set 0 OR trial set 1
place_cell_logical = 'or' 
ts_key = 'dff' # which timeseries to use for finding peaks
use_speed_thr = True # use a speed threshold to calculate new trial matrices
speed_thr = 2 # speed threshold in cm/s (excludes data at speed less than this)

reward_dist_inclusive = 50 #in cm
reward_dist_exclusive = 50 #in cm
reward_overrep_dist = 50 #in cm

experiment = 'MetaLearn'
year = 'combined'

if experiment == 'NeuroMods':
    # exp_days = [1, 3, 5, 6, 7, 8, 9, 10, 12, 14, 15, 17]
    exp_days = [8, 10, 12, 14, 15, 17]
elif experiment == 'MetaLearn':
    # exp_days = [1,2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] # all days
    exp_days = [3, 5, 7, 8, 10, 12, 14] # switch days
    # exp_days = [1,2,4,6,9,11,13] # "stay" days


# create a tag to label the filename with params
tag = ''
if smooth:
    tag = ('smoothed_sig%d' % sigma)
else:
    tag = 'unsmoothed'

if exclude_int:
    tag = tag + ('_excInt%.1f' % int_thresh)

tag = tag + ('_inc%d' % reward_dist_inclusive)

if use_speed_thr:
    tag = tag + '_useSpeed'

# For loading individual day pickles
day_params={'speed': str(speed_thr),
          'nperms': 100, # shuffles for defining place cells
          'baseline_method': 'maximin', # dF/F method
          'ts_key': 'events' # timeseries used for identifying place cells
          }

multiDayData = dict()

add_all_computations = False


with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)

    for d_i, exp_day in enumerate(exp_days):

        anim_list = dd.define_anim_list(experiment, exp_day, year=year)

        print(anim_list)

        multi_anim_sess = dd.load_multi_anim_sess(path_dict, exp_day, anim_list,
                                                  params=day_params
                                                  )

        # initialize class with basic info
        multiDayData[exp_day] = dd.dayData(anim_list,
                                           multi_anim_sess,
                                           exp_day=exp_day,
                                           experiment=experiment,
                                           # timeseries to use
                                           ts_key=ts_key,  # to use for analysis, reward cell fractions,
                                           #                                            # finding place cell peaks
                                           force_two_sets=True,  # of trials
                                           use_speed_thr=use_speed_thr,
                                           speed_thr=speed_thr,
                                           exclude_int=exclude_int,
                                           int_thresh=int_thresh,
                                           int_method='speed',
                                           reward_dist_exclusive=reward_dist_inclusive,
                                           reward_dist_inclusive=reward_dist_exclusive,
                                           reward_overrep_dist=reward_overrep_dist,
                                           )

        # add things to the class that are computationally intensive/time-consuming
        if add_all_computations:
            multiDayData[exp_day].add_all_the_things(anim_list, 
                                                    multi_anim_sess,
                                                    add_behavior=True,
                                                    add_cell_classes=True,
                                                    add_circ_relative_peaks=True,
                                                    add_field_dict=True,
                                                    bin_size=bin_size,  # for quantifying distribution of place field peak locations
                                                    sigma=sigma,  # for smoothing
                                                    smooth=smooth,  # whether to smooth for finding place cell peaks
                                                    # (activity will be auto smoothed for everything else)
                                                    impute_NaNs=True,

                                                    place_cell_logical=place_cell_logical,
                                                    ts_key=ts_key,
                                                    lick_correction_thr=0.35,
                                                    )

        %reset_selective -f multi_anim_sess

['GCAMP2' 'GCAMP3' 'GCAMP4' 'GCAMP5' 'GCAMP6' 'GCAMP7' 'GCAMP10' 'GCAMP11'
 'GCAMP12' 'GCAMP13' 'GCAMP14' 'GCAMP15' 'GCAMP17' 'GCAMP18' 'GCAMP19']
/data/2P/multi_anim_sess/2-3-4-5-6-7-10-11-12-13-14-15-17-18-19_expday3_speed2_perms100_maximin_events.pickle
Splitting trials in half
set 0: C trials / set 1: A trials
set 0: B trials / set 1: A trials
set 0: A trials / set 1: C trials
Splitting trials in half
set 0: A trials / set 1: C trials
Splitting trials in half
set 0: B trials / set 1: A trials
set 0: B trials / set 1: C trials
set 0: C trials / set 1: B trials
set 0: A trials / set 1: B trials
set 0: A trials / set 1: C trials
set 0: B trials / set 1: C trials
set 0: A trials / set 1: B trials
set 0: C trials / set 1: B trials
['GCAMP2' 'GCAMP3' 'GCAMP4' 'GCAMP6' 'GCAMP7' 'GCAMP10' 'GCAMP11'
 'GCAMP12' 'GCAMP13' 'GCAMP14' 'GCAMP15' 'GCAMP17' 'GCAMP18' 'GCAMP19']
/data/2P/multi_anim_sess/2-3-4-6-7-10-11-12-13-14-15-17-18-19_expday5_speed2_perms100_maximin_events.pickle
Splitting tria

In [27]:
# print attributes of dayData class for day 3
multiDayData[3].__dict__.keys()

dict_keys(['anim_list', 'experiment', 'place_cell_logical', 'force_two_sets', 'ts_key', 'use_speed_thr', 'speed_thr', 'exclude_int', 'int_thresh', 'int_method', 'reward_dist_exclusive', 'reward_dist_inclusive', 'reward_overrep_dist', 'activity_criterion', 'bin_size', 'sigma', 'smooth', 'impute_NaNs', 'sim_method', 'lick_correction_thr', 'exp_day', 'is_switch', 'anim_tag', 'trial_dict', 'rzone_pos', 'rzone_by_trial', 'rzone_label', 'blocks', 'activity_matrix', 'events', 'place_cell_masks', 'SI', 'overall_place_cell_masks', 'place_cell_trial_to_trial_stability', 'stability_masks', 'peaks', 'field_dict', 'plane_per_cell', 'is_int', 'is_reward_cell', 'is_end_cell', 'is_track_cell', 'pc_distr', 'rew_frac', 'rate_map', 'pv_sim_mean', 'sim_to_set0', 'sim_mat', 'curr_zone_lickrate', 'other_zone_lickrate', 'curr_vs_other_lickratio', 'in_vs_out_lickratio', 'lickpos_std', 'lickpos_com', 'lick_mat', 'def_block_by', 'cell_class', 'pos_bin_centers', 'dist_btwn_rel_null', 'dist_btwn_rel_peaks', 'rewa

In [9]:
max_anim_list = sorted(np.unique(np.concatenate([multiDayData[day].anim_list
                                                     for day in exp_days])), 
                           key=len)
max_anim_list

['GCAMP2',
 'GCAMP3',
 'GCAMP4',
 'GCAMP5',
 'GCAMP6',
 'GCAMP7',
 'GCAMP10',
 'GCAMP11',
 'GCAMP12',
 'GCAMP13',
 'GCAMP14',
 'GCAMP15',
 'GCAMP17',
 'GCAMP18',
 'GCAMP19']

In [14]:
multiDayData.keys()

dict_keys([3, 5, 7, 8, 10, 12, 14])

In [15]:
include_ans = multiDayData[exp_days[-1]].circ_rel_stats_across_an['include_ans']
include_ans

[]

## Save multiDayData as pickle

In [10]:
from datetime import datetime

pkl_name = "%s_expdays%s_multiDayData_%s_%s_%s.pickle" % (ut.make_anim_tag(max_anim_list),
                                                          ut.make_day_tag(
                                                              exp_days),
                                                          ts_key,
                                                          tag,
                                                          datetime.now().strftime("%Y%m%d-%H%M"))
print(pkl_name)
file_dir = os.path.join(path_dict['preprocessed_root'], 'multiDayData')
ut.write_sess_pickle(multiDayData, file_dir, pkl_name, overwrite=False)

2-3-4-5-6-7-10-11-12-13-14-15-17-18-19_expdays3_multiDayData_dff_unsmoothed_excInt0.5_inc50_useSpeed_20240916-2202.pickle
writing 2-3-4-5-6-7-10-11-12-13-14-15-17-18-19_expdays3_multiDayData_dff_unsmoothed_excInt0.5_inc50_useSpeed_20240916-2202.pickle
