In [None]:
import os
import pickle
import glob
import random
import time

import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from sklearn import ensemble, svm
from sklearn.metrics import balanced_accuracy_score, classification_report
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import RobustScaler, StandardScaler

import npc_lims
from npc_sessions import DynamicRoutingSession
import npc_sessions.utils
from dynamic_routing_analysis import spike_utils, decoding_utils


In [None]:
all_ephys_sessions = tuple(s for s in npc_lims.get_session_info(is_ephys=True, is_uploaded=True, is_annotated=True))

In [None]:
def load_facemap_data(session,session_info,trials,vid_angle,keep_n_SVDs=500,use_s3=True):

    vid_angle_npc_names={
            'behavior':'side',
            'face':'front',
            'eye':'eye',
            }
    
    # path=os.path.join(session_info.allen_path,'processed')

    if use_s3==False:
        if vid_angle=='behavior':
            multi_ROI_path=r"D:\DR Pilot Data\full_video_multi_ROI"
            _dir,vidfilename=os.path.split(glob.glob(os.path.join(session_info.allen_path,"Behavior_*.mp4"))[0])
        elif vid_angle=='face':
            multi_ROI_path=r"D:\DR Pilot Data\full_video_multi_ROI_face"
            _dir,vidfilename=os.path.split(glob.glob(os.path.join(session_info.allen_path,"Face_*.mp4"))[0])

        behav_path = os.path.join(multi_ROI_path,vidfilename[:-4]+'_trimmed_proc.npy')
        behav_info=np.load(behav_path,allow_pickle=True)

        for frame_time in session._video_frame_times:
            if vid_angle_npc_names[vid_angle] in frame_time.name:
                cam_frames=frame_time.timestamps
                break

        facemap_info={}

        #actually keep all ROIs
        #facemap_info['motion']=behav_info.item()['motion']
        facemap_info['motSVD']=behav_info.item()['motSVD']
    #use s3 data
    else:
        # behav_info = np.load(
        #     npc_lims.get_cache_path('facemap',session.id,version='any'),
        #     allow_pickle=True
        # )
        # behav_info = behav_info.item()
        # cam_frames = behav_info['cam_frames']
        camera_to_facemap_name = {
            "face": "Face",
            "behavior": "Behavior",
        }
        motion_svd = npc_sessions.utils.get_facemap_output_from_s3(
                    session.id, camera_to_facemap_name[vid_angle], "motSVD"
                )
        
        for frame_time in session._video_frame_times:
            if vid_angle_npc_names[vid_angle] in frame_time.name:
                cam_frames=frame_time.timestamps
                break

        facemap_info = {
            #'motion': behav_info['motion'],
            'motSVD': motion_svd
        }

    #calculate mean face motion, SVD in 1 sec prior to each trial
    # 1 sec before stimulus onset
    time_before=0.2
    time_after=0
    fps=60

    behav_SVD_by_trial={}
    behav_motion_by_trial={}
    mean_trial_behav_SVD={}
    mean_trial_behav_motion={}

    # trials=pd.read_parquet(
    #             npc_lims.get_cache_path('trials',session.id,version='any')
    #         )

    if use_s3==False:
        for rr in range(0,len(facemap_info['motSVD'])):
            behav_SVD_by_trial[rr] = np.zeros((int((time_before+time_after)*fps),keep_n_SVDs,len(trials)))
            behav_motion_by_trial[rr] = np.zeros((int((time_before+time_after)*fps),len(trials)))

            behav_SVD_by_trial[rr][:]=np.nan
            behav_motion_by_trial[rr][:]=np.nan

            for tt,stimStartTime in enumerate(trials[:]['stim_start_time']):
                if len(np.where(cam_frames>=stimStartTime)[0])>0:
                    stim_start_frame=np.where(cam_frames>=stimStartTime)[0][0]
                    trial_start_frame=int(stim_start_frame-time_before*fps)
                    trial_end_frame=int(stim_start_frame+time_after*fps)
                    if trial_start_frame<facemap_info['motSVD'][rr][:,0].shape[0] and trial_end_frame<facemap_info['motSVD'][rr][:,0].shape[0]:
                        behav_SVD_by_trial[rr][:,:,tt] = facemap_info['motSVD'][rr][trial_start_frame:trial_end_frame,:keep_n_SVDs]    
                        behav_motion_by_trial[rr][:,tt] = facemap_info['motion'][rr][trial_start_frame:trial_end_frame]
                    else:
                        break

            mean_trial_behav_SVD[rr] = np.nanmean(behav_SVD_by_trial[rr],axis=0)
            mean_trial_behav_motion[rr] = np.nanmean(behav_motion_by_trial[rr],axis=0)

    else:
        rr=0
        motsvd=np.asarray(facemap_info['motSVD'][:,:])

        behav_SVD_by_trial[rr] = np.zeros((int((time_before+time_after)*fps),keep_n_SVDs,len(trials)))
        behav_motion_by_trial[rr] = np.zeros((int((time_before+time_after)*fps),len(trials)))

        behav_SVD_by_trial[rr][:]=np.nan
        behav_motion_by_trial[rr][:]=np.nan

        for tt,stimStartTime in enumerate(trials[:]['stim_start_time']):
            if len(np.where(cam_frames>=stimStartTime)[0])>0:
                stim_start_frame=np.where(cam_frames>=stimStartTime)[0][0]
                trial_start_frame=int(stim_start_frame-time_before*fps)
                trial_end_frame=int(stim_start_frame+time_after*fps)
                if trial_start_frame<motsvd[:,0].shape[0] and trial_end_frame<motsvd[:,0].shape[0]:
                    behav_SVD_by_trial[rr][:,:,tt] = motsvd[trial_start_frame:trial_end_frame,:keep_n_SVDs]    
                    # behav_motion_by_trial[rr][:,tt] = facemap_info['motion'][trial_start_frame:trial_end_frame]
                else:
                    break

        mean_trial_behav_SVD[rr] = np.nanmean(behav_SVD_by_trial[rr],axis=0)
        # mean_trial_behav_motion[rr] = np.nanmean(behav_motion_by_trial[rr],axis=0)

    return mean_trial_behav_SVD #mean_trial_behav_motion

In [None]:
savepath=r"\\allen\programs\mindscope\workgroups\templeton\TTOC\decoding results\new_annotations\linear_shift_facemap_face_use_more_trials_20_svds"

# decoder_results={}
except_dict={}

input_data_type='facemap' #spikes or facemap
vid_angle='face' #behavior, face, eye
central_section='4_blocks_plus'

exclude_cue_trials=False
##TODO: decode the labels from the previous trial - 
#### UPDATE: not doing this, can just pick +1 as the "center" - otherwise it gets confusing

n_unit_threshold=20
keep_n_SVDs=20

#set params
spikes_binsize=0.2
spikes_time_before=0.2
spikes_time_after=0.1

decoder_binsize=0.2
decoder_time_before=0.2
decoder_time_after=0.1

start_time=time.time()

#loop through sessions
# session= all_ephys_sessions[10]
for session_info in all_ephys_sessions[:]:
    session=DynamicRoutingSession(session_info.id)
    session_id=str(session_info.id)
    decoder_results={}
    try:
        #load trials and units
        try:
            trials=pd.read_parquet(
                npc_lims.get_cache_path('trials',session_info.id,version='0.0.214')
            )
        except:
            print('no cached trials table, using npc_sessions')
            trials = session.trials[:]

        if exclude_cue_trials:
            trials=trials.query('is_reward_scheduled==False').reset_index()

        if input_data_type=='spikes':
            #make data array
            try:
                units=pd.read_parquet(
                    npc_lims.get_cache_path('units',session_info.id,version='0.0.214')
                )
            except:
                print('no cached units table, using npc_sessions')
                units = session.units[:]
            #add probe to structure name
            structure_probe=spike_utils.get_structure_probe(units)
            for uu, unit in units.iterrows():
                units.loc[units['unit_id']==unit['unit_id'],'structure']=structure_probe.loc[structure_probe['unit_id']==unit['unit_id'],'structure_probe']
            
            #make trial data array for baseline activity
            trial_da = spike_utils.make_neuron_time_trials_tensor(units, trials, spikes_time_before, spikes_time_after, spikes_binsize)

        elif input_data_type=='facemap':
            # mean_trial_behav_SVD,mean_trial_behav_motion = load_facemap_data(session,session_info,trials,vid_angle)
            mean_trial_behav_SVD = load_facemap_data(session,session_info,trials,vid_angle,keep_n_SVDs)
        
        
        #make fake blocks for templeton sessions
        if 'Templeton' in session_info.project:
            start_time=trials['start_time'].iloc[0]
            fake_context=np.full(len(trials), fill_value='nan')
            fake_block_nums=np.full(len(trials), fill_value=np.nan)
            block_context_names=['vis','aud']

            if np.random.choice(block_context_names,1)=='vis':
                block_contexts=['vis','aud','vis','aud','vis','aud']
            else:
                block_contexts=['aud','vis','aud','vis','aud','vis']

            for block in range(0,6):
                block_start_time=start_time+block*10*60
                block_end_time=start_time+(block+1)*10*60
                block_trials=trials[:].query('start_time>=@block_start_time').index
                fake_context[block_trials]=block_contexts[block]
                fake_block_nums[block_trials]=block
            trials['block_index']=fake_block_nums
            trials['context_name']=fake_context

        if central_section=='4_blocks':
            #find middle 4 block labels
            middle_4_block_trials=trials.query('block_index>0 and block_index<5')
            middle_4_blocks=middle_4_block_trials.index.values

            #find the number of trials to shift by, from -1 to +1 block
            negative_shift=middle_4_blocks.min()
            positive_shift=trials.index.max()-middle_4_blocks.max()
            shifts=np.arange(-negative_shift,positive_shift+1)
        elif central_section=='4_blocks_plus':
            #find middle 4 block labels
            first_block=trials.query('block_index==0').index.values
            middle_of_first=first_block[np.round(len(first_block)/2).astype('int')]

            last_block=trials.query('block_index==5').index.values
            middle_of_last=last_block[np.round(len(last_block)/2).astype('int')]

            middle_4_block_trials=trials.loc[middle_of_first:middle_of_last]
            middle_4_blocks=middle_4_block_trials.index.values

            #find the number of trials to shift by, from -1 to +1 block
            negative_shift=middle_4_blocks.min()
            positive_shift=trials.index.max()-middle_4_blocks.max()
            shifts=np.arange(-negative_shift,positive_shift+1)
            # #add 1 block to the end
            # shifts=np.concatenate([shifts,[shifts.max()+1]])

        decoder_results[session_id]={}
        decoder_results[session_id]['shifts'] = shifts
        decoder_results[session_id]['middle_4_blocks'] = middle_4_blocks
        decoder_results[session_id]['spikes_binsize'] = spikes_binsize
        decoder_results[session_id]['spikes_time_before'] = spikes_time_before
        decoder_results[session_id]['spikes_time_after'] = spikes_time_after
        decoder_results[session_id]['decoder_binsize'] = decoder_binsize
        decoder_results[session_id]['decoder_time_before'] = decoder_time_before
        decoder_results[session_id]['decoder_time_after'] = decoder_time_after
        decoder_results[session_id]['input_data_type'] = input_data_type
        if input_data_type=='facemap':
            decoder_results[session_id]['vid_angle'] = vid_angle
        decoder_results[session_id]['trials'] = trials
        decoder_results[session_id]['results'] = {}

        
        if input_data_type=='spikes':
            areas=units['structure'].unique()
            areas=np.concatenate([areas,['all']])
        elif input_data_type=='facemap':
            # areas = list(mean_trial_behav_SVD.keys())
            areas=[0]

        decoder_results[session_id]['areas'] = areas

        for aa in areas:
            #make shifted trial data array
            if input_data_type=='spikes':
                if aa == 'all':
                    area_units=units
                else:
                    area_units=units.query('structure==@aa')

                n_units=len(area_units)
                if n_units<n_unit_threshold:
                    continue
            
            decoder_results[session_id]['results'][aa]={}
            decoder_results[session_id]['results'][aa]['shift']={}

            if input_data_type=='spikes':
                
                decoder_results[session_id]['results'][aa]['unit_ids']={}
                decoder_results[session_id]['results'][aa]['n_units']={}
                decoder_results[session_id]['results'][aa]['unit_ids']=area_units['unit_id'].values
                decoder_results[session_id]['results'][aa]['n_units']=len(area_units)

                #find mean ccf location of units
                decoder_results[session_id]['results'][aa]['ccf_ap_mean']=area_units['ccf_ap'].mean()
                decoder_results[session_id]['results'][aa]['ccf_dv_mean']=area_units['ccf_dv'].mean()
                decoder_results[session_id]['results'][aa]['ccf_ml_mean']=area_units['ccf_ml'].mean()

            # elif input_data_type=='facemap':
            #     # decoder_results[session_id]['results'][aa]['unit_ids']={}
            #     # decoder_results[session_id]['results'][aa]['n_units']={}
            #     # decoder_results[session_id]['results'][aa]['unit_ids']=list(mean_trial_behav_SVD.keys())
            #     # decoder_results[session_id]['results'][aa]['n_units']=len(mean_trial_behav_SVD)

            #loop through shifts

            for sh,shift in enumerate(shifts):
                
                labels=middle_4_block_trials['context_name'].values

                if input_data_type=='spikes':
                    # if exclude_cue_trials:
                    shifted_trial_da = trial_da.sel(trials=middle_4_blocks+shift,unit_id=area_units['unit_id'].values).mean(dim='time').values
                    # else:
                    #     # # use next trial's activity as input / a.k.a. last trial's label
                    #     # if np.any((middle_4_blocks+1)+shift > trial_da['trials'].max().values):
                    #     #     continue
                    #     shifted_trial_da = trial_da.sel(trials=(middle_4_blocks)+shift,unit_id=area_units['unit_id'].values).mean(dim='time').values
                    input_data=shifted_trial_da.T

                elif input_data_type=='facemap':
                    # if exclude_cue_trials:
                    trials_used=middle_4_blocks+shift
                    # else:
                    #     # # use next trial's activity as input / a.k.a. last trial's label
                    #     # if np.any((middle_4_blocks+1)+shift > trial_da['trials'].max().values):
                    #     #     continue
                    #     trials_used=(middle_4_blocks)+shift
                    shift_exists=[]
                    for tt in trials_used:
                        if tt<mean_trial_behav_SVD[aa].shape[1]:
                            shift_exists.append(True)
                        else:
                            shift_exists.append(False)
                    shift_exists=np.array(shift_exists)
                    trials_used=trials_used[shift_exists]

                    SVD=mean_trial_behav_SVD[aa][:,trials_used]
                    input_data=SVD.T

                    if np.sum(np.isnan(input_data))>0:
                        incl_inds=~np.isnan(input_data).any(axis=1)
                        input_data=input_data[incl_inds,:]
                        labels=labels[incl_inds]

                decoder_results[session_id]['results'][aa]['shift'][sh]=decoding_utils.linearSVC_decoder(
                        input_data=input_data,
                        labels=labels,
                        crossval='5_fold',
                        crossval_index=None,
                        labels_as_index=True
                    )

            
            print(f'finished {session_id} {aa}')
        #save results
        with open(os.path.join(savepath,session_id+'_decoder_results.pkl'),'wb') as f:
            pickle.dump(decoder_results[session_id],f)

        print(f'finished {session_id}')
        print(f'time elapsed: {time.time()-start_time}')

    except Exception as e:
        print(f'error in {session_id}')
        print(e)
        except_dict[session_id]=e
    


In [None]:
# except_dict_first_half=except_dict.copy()
# except_dict_first_half
trial_da['trials'].max().values

In [None]:
# trials.query('is_reward_scheduled')
except_dict

In [None]:
all_ephys_sessions[66]

In [None]:
#TODO:
#check that SVDs are aligned - usually see changes arounf stimuli and especially licking
#plot SVD aligned to licks and/or stim onsets

In [None]:
mean_trial_behav_SVD[aa].shape[1]

In [None]:
session._facemap

In [None]:
#find middle 4 block labels
first_block=trials.query('block_index==0').index.values
middle_of_first=first_block[np.round(len(first_block)/2).astype('int')]

last_block=trials.query('block_index==5').index.values
middle_of_last=last_block[np.round(len(last_block)/2).astype('int')]

middle_4_block_trials=trials.loc[middle_of_first:middle_of_last]
middle_4_blocks=middle_4_block_trials.index.values

#find the number of trials to shift by, from -1 to +1 block
negative_shift=middle_4_blocks.min()
positive_shift=trials.index.max()-middle_4_blocks.max()
shifts=np.arange(-negative_shift,positive_shift+1)
#add 1 block to the end
shifts=np.concatenate([shifts,[shifts.max()+1]])

In [None]:
middle_of_last

In [None]:
shifts

In [None]:
fig,ax=plt.subplots(1,1)
ax.plot(middle_4_block_trials['context_name'].values=='vis')

In [None]:
decoder_results[list(decoder_results.keys())[0]].keys()

In [None]:
#loop through decoder results, append to dict
files=glob.glob(os.path.join(savepath,'*_decoder_results.pkl'))

decoder_results={}
for ii,ff in enumerate(files):
    with open(ff,'rb') as f:
        data=pickle.load(f)
    decoder_results[ff.split('\\')[-1].split('_decoder')[0]]=data


In [None]:
decoder_results.keys()

In [None]:
savepath=r"\\allen\programs\mindscope\workgroups\templeton\TTOC\decoding results\linear_shift"
#save results
with open(os.path.join(savepath,'combined','DR_behavior_video_test_decoder_results.pkl'),'wb') as f:
    pickle.dump(decoder_results,f)

In [None]:
loadpath=r"\\allen\programs\mindscope\workgroups\templeton\TTOC\decoding results\linear_shift\combined\DR_face_video_test_decoder_results.pkl"
with open(loadpath,'rb') as f:
    decoder_results=pickle.load(f)

In [None]:
decoder_results.keys()

In [None]:
# areas=units['structure'].unique()
# areas=np.concatenate([areas,['all']])
# areas

In [None]:
units['structure'].unique()

In [None]:
session_id=list(decoder_results.keys())[0]

shifts=decoder_results[session_id]['shifts']
areas=decoder_results[session_id]['areas']

half_neg_shift=np.round(shifts.min()/2)
half_pos_shift=np.round(shifts.max()/2)
# half_shifts=np.arange(-half_neg_shift,half_pos_shift+1)
half_neg_shift_ind=np.where(shifts==half_neg_shift)[0][0]
half_pos_shift_ind=np.where(shifts==half_pos_shift)[0][0]
half_shift_inds=np.arange(half_neg_shift_ind,half_pos_shift_ind+1)

bal_acc={}
for aa in areas:
    if aa in decoder_results[session_id]['results']:
        bal_acc[aa]=[]
        for sh in half_shift_inds:
            bal_acc[aa].append(decoder_results[session_id]['results'][aa]['shift'][sh]['balanced_accuracy'])
        bal_acc[aa]=np.array(bal_acc[aa])


In [None]:
decoder_results[session_id]['results'][aa]['shift'][sh]['balanced_accuracy']

In [None]:
bal_acc[aa]

In [None]:
shifts
shifts[half_shift_inds]

In [None]:
for aa in areas:
    if aa in decoder_results[session_id]['results']:
        true_acc=bal_acc[aa][shifts[half_shift_inds]==0]
        pval=np.round(np.mean(bal_acc[aa]>=true_acc),decimals=4)
        
        fig,ax=plt.subplots(1,1)
        ax.axhline(true_acc,color='k',linestyle='--',alpha=0.5)
        # ax.plot(shifts,bal_acc[aa])
        ax.plot(shifts[half_shift_inds],bal_acc[aa])
        ax.set_xlabel('trial shift')
        ax.set_ylabel('balanced accuracy')
        # ax.set_title(aa+' n='+str(decoder_results[session_id]['results'][aa]['n_units'])+' p='+str(pval))
        ax.set_title(str(aa)+' p='+str(pval))

        

In [None]:
#distributions of null values vs. true value
for aa in areas:
    if aa in decoder_results[session_id]['results']:
        true_acc=bal_acc[aa][shifts[half_shift_inds]==0]
        pval=np.round(np.mean(bal_acc[aa]>=true_acc),decimals=4)
        pct_95=np.percentile(bal_acc[aa],95)

        fig,ax=plt.subplots(1,1)
        ax.axvline(true_acc,color='r',linestyle='--',alpha=0.5)
        ax.axvline(pct_95,color='k',linestyle='--',alpha=0.5)
        
        # ax.plot(shifts,bal_acc[aa])
        ax.hist(bal_acc[aa],bins=20)
        ax.axvline(np.median(bal_acc[aa]),color='k')
        ax.set_xlabel('balanced accuracy')
        ax.set_ylabel('count')
        # ax.set_title(aa+' n='+str(decoder_results[session_id]['results'][aa]['n_units'])+' p='+str(pval))
        ax.set_title(str(aa)+' p='+str(pval))
        

In [None]:
true_acc=bal_acc['ACAd'][shifts==0]
np.mean(bal_acc['ACAd']>=true_acc)

In [None]:
shifted_trial_da.values.shape

In [None]:
decoder_results[session.id]['results'] = 

In [None]:
middle_4_blocks=trials.query('block_index>0 and block_index<5').index.values
trials.index.max()-middle_4_blocks.max()

In [None]:
# middle_4_blocks.min()
trials.index.max()

In [None]:
shifts=np.arange(-91,83+1)
middle_4_blocks+shifts[-1]

In [None]:
session_list=list(decoder_results.keys())

In [None]:
decoder_results[session_list[0]].keys()

In [None]:
decoder_results[session_list[5]]['shifts']

In [None]:
session_list=list(decoder_results.keys())

all_bal_acc={}
linear_shift_dict={
    'session_id':[],
    'area':[],
    'true_accuracy':[],
    'null_accuracy_mean':[],
    'null_accuracy_median':[],
    'null_accuracy_std':[],
    'p_value':[],
}

#loop through sessions
for session_id in session_list:
    all_bal_acc[session_id]={}

    shifts=decoder_results[session_id]['shifts']
    #extract results according to the trial shift
    half_neg_shift=np.round(shifts.min()/2)
    half_pos_shift=np.round(shifts.max()/2)
    # half_shifts=np.arange(-half_neg_shift,half_pos_shift+1)
    half_neg_shift_ind=np.where(shifts==half_neg_shift)[0][0]
    half_pos_shift_ind=np.where(shifts==half_pos_shift)[0][0]
    half_shift_inds=np.arange(half_neg_shift_ind,half_pos_shift_ind+1)

    all_bal_acc[session_id]['shifts']=shifts
    all_bal_acc[session_id]['half_shift_inds']=half_shift_inds
    half_shifts=shifts[half_shift_inds]
    
    areas=decoder_results[session_id]['areas']

    #save balanced accuracy by shift
    for aa in areas:
        if aa in decoder_results[session_id]['results']:
            all_bal_acc[session_id][aa]=[]
            for sh in half_shift_inds:
                all_bal_acc[session_id][aa].append(decoder_results[session_id]['results'][aa]['shift'][sh]['balanced_accuracy'])
            all_bal_acc[session_id][aa]=np.array(all_bal_acc[session_id][aa])

            if type(aa)==str:
                if '_probe' in aa:
                    area_name=aa.split('_probe')[0]
                else:
                    area_name=aa
            else:
                area_name=aa
            

            true_acc_ind=np.where(half_shifts==0)[0][0]
            null_acc_ind=np.where(half_shifts!=0)[0]
            true_accuracy=all_bal_acc[session_id][aa][true_acc_ind]
            null_accuracy_mean=np.mean(all_bal_acc[session_id][aa][null_acc_ind])
            null_accuracy_median=np.median(all_bal_acc[session_id][aa][null_acc_ind])
            null_accuracy_std=np.std(all_bal_acc[session_id][aa][null_acc_ind])
            p_value=np.mean(all_bal_acc[session_id][aa][null_acc_ind]>=true_accuracy)

            #make big dict/dataframe for this:
            #save true decoding, mean/median null decoding, and p value for each area/probe
            linear_shift_dict['session_id'].append(session_id)
            linear_shift_dict['area'].append(area_name)
            linear_shift_dict['true_accuracy'].append(true_accuracy)
            linear_shift_dict['null_accuracy_mean'].append(null_accuracy_mean)
            linear_shift_dict['null_accuracy_median'].append(null_accuracy_median)
            linear_shift_dict['null_accuracy_std'].append(null_accuracy_std)
            linear_shift_dict['p_value'].append(p_value)
    

linear_shift_df=pd.DataFrame(linear_shift_dict)
# linear_shift_df.to_csv(os.path.join(r'\\allen\programs\mindscope\workgroups\templeton\TTOC\decoding results\linear_shift\combined','Templ_linear_shift_results.csv'))

In [None]:
linear_shift_df=pd.read_csv(os.path.join(r'\\allen\programs\mindscope\workgroups\templeton\TTOC\decoding results\linear_shift\combined','DR_linear_shift_results.csv'))

In [None]:
linear_shift_df

In [None]:
linear_shift_df['area'].unique()

In [None]:
#area - fraction significant
p_threshold=0.05
frac_sig={
    'area':[],
    'frac_sig':[],
    'n_expts':[],
}

for area in linear_shift_df['area'].unique():
    frac_sig['area'].append(area)
    frac_sig['frac_sig'].append(np.mean(linear_shift_df.query('area==@area')['p_value']<p_threshold))
    frac_sig['n_expts'].append(len(linear_shift_df.query('area==@area')))

frac_sig_df=pd.DataFrame(frac_sig)
frac_sig_df

In [None]:
#barplot sorted by fraction significant
min_n_expts=3

plot_frac_sig_df=frac_sig_df.query('n_expts>=@min_n_expts').sort_values('frac_sig',ascending=False)
fig,ax=plt.subplots(1,1,figsize=(12,5))
ax.bar(plot_frac_sig_df['area'],plot_frac_sig_df['frac_sig'])
ax.set_ylabel('fraction significant decoding')
ax.set_xlabel('area')
# #add labels with n_expts
# area_labels=[]
# for i in range(plot_frac_sig_df.shape[0]):
#     area_labels.append(plot_frac_sig_df['area'].iloc[i]+' ('+str(plot_frac_sig_df['n_expts'].iloc[i])+')')
# ax.set_xticklabels(area_labels,rotation=90,ha='center')
ax.set_ylim([0,1])
fig.tight_layout()

In [None]:
#area - diff from null
diff_from_null={
    'area':[],
    'diff_from_null_mean':[],
    'diff_from_null_median':[],
    'n_expts':[],
}

for area in linear_shift_df['area'].unique():
    diff_from_null['area'].append(area)
    diff_from_null['diff_from_null_mean'].append((linear_shift_df.query('area==@area')['true_accuracy']-
                                                 linear_shift_df.query('area==@area')['null_accuracy_mean']).mean())
    diff_from_null['diff_from_null_median'].append((linear_shift_df.query('area==@area')['true_accuracy']-
                                                    linear_shift_df.query('area==@area')['null_accuracy_median']).mean())
    diff_from_null['n_expts'].append(len(linear_shift_df.query('area==@area')))

diff_from_null_df=pd.DataFrame(diff_from_null)
diff_from_null_df

In [None]:
#barplot sorted by diff from null mean
min_n_expts=3

plot_diff_from_null_df=diff_from_null_df.query('n_expts>=@min_n_expts').sort_values('diff_from_null_mean',ascending=False) 
fig,ax=plt.subplots(1,1,figsize=(12,5))
ax.bar(plot_diff_from_null_df['area'],plot_diff_from_null_df['diff_from_null_mean'])
ax.set_ylabel('mean difference from null')
ax.set_xlabel('area')
#add labels with n_expts
# area_labels=[]
# for i in range(plot_diff_from_null_df.shape[0]):
#     area_labels.append(plot_diff_from_null_df['area'].iloc[i]+' ('+str(plot_diff_from_null_df['n_expts'].iloc[i])+')')
# ax.set_xticklabels(area_labels,rotation=90,ha='center')
ax.set_ylim([-0.025,0.12])
fig.tight_layout()


In [None]:
#barplot sorted by diff from null median
min_n_expts=3

plot_diff_from_null_df=diff_from_null_df.query('n_expts>=@min_n_expts').sort_values('diff_from_null_median',ascending=False)
fig,ax=plt.subplots(1,1,figsize=(12,5))
ax.bar(plot_diff_from_null_df['area'],plot_diff_from_null_df['diff_from_null_median'])
ax.set_ylabel('median difference from null')
ax.set_xlabel('area')
#add labels with n_expts
area_labels=[]
for i in range(plot_diff_from_null_df.shape[0]):
    area_labels.append(plot_diff_from_null_df['area'].iloc[i]+' ('+str(plot_diff_from_null_df['n_expts'].iloc[i])+')')
ax.set_xticklabels(area_labels,rotation=90,ha='center')
ax.set_ylim([-0.025,0.12])
fig.tight_layout()

In [None]:
#compare DR and Templeton:
p_threshold=0.05

DR_linear_shift_df=pd.read_csv(os.path.join(r'\\allen\programs\mindscope\workgroups\templeton\TTOC\decoding results\linear_shift\combined','DR_linear_shift_results.csv'))
DR_linear_shift_df['project']='DynamicRouting'
#fraction significant
frac_sig_DR={
    'area':[],
    'frac_sig_DR':[],
    'n_expts_DR':[],
}
for area in DR_linear_shift_df['area'].unique():
    frac_sig_DR['area'].append(area)
    frac_sig_DR['frac_sig_DR'].append(np.mean(DR_linear_shift_df.query('area==@area')['p_value']<p_threshold))
    frac_sig_DR['n_expts_DR'].append(len(DR_linear_shift_df.query('area==@area')))
frac_sig_DR_df=pd.DataFrame(frac_sig_DR)
#diff from null
diff_from_null_DR={
    'area':[],
    'diff_from_null_mean_DR':[],
    'diff_from_null_median_DR':[],
    'n_expts_DR':[],
}
for area in DR_linear_shift_df['area'].unique():
    diff_from_null_DR['area'].append(area)
    diff_from_null_DR['diff_from_null_mean_DR'].append((DR_linear_shift_df.query('area==@area')['true_accuracy']-
                                                 DR_linear_shift_df.query('area==@area')['null_accuracy_mean']).mean())
    diff_from_null_DR['diff_from_null_median_DR'].append((DR_linear_shift_df.query('area==@area')['true_accuracy']-
                                                    DR_linear_shift_df.query('area==@area')['null_accuracy_median']).mean())
    diff_from_null_DR['n_expts_DR'].append(len(DR_linear_shift_df.query('area==@area')))

diff_from_null_DR_df=pd.DataFrame(diff_from_null_DR)
diff_from_null_DR_df


Templeton_linear_shift_df=pd.read_csv(os.path.join(r'\\allen\programs\mindscope\workgroups\templeton\TTOC\decoding results\linear_shift\combined','Templ_linear_shift_results.csv'))
Templeton_linear_shift_df['project']='Templeton'
#fraction significant
frac_sig_Templ={
    'area':[],
    'frac_sig_Templ':[],
    'n_expts_Templ':[],
}
for area in Templeton_linear_shift_df['area'].unique():
    frac_sig_Templ['area'].append(area)
    frac_sig_Templ['frac_sig_Templ'].append(np.mean(Templeton_linear_shift_df.query('area==@area')['p_value']<p_threshold))
    frac_sig_Templ['n_expts_Templ'].append(len(Templeton_linear_shift_df.query('area==@area')))
frac_sig_Templ_df=pd.DataFrame(frac_sig_Templ)
#diff from null
diff_from_null_Templ={
    'area':[],
    'diff_from_null_mean_Templ':[],
    'diff_from_null_median_Templ':[],
    'n_expts_Templ':[],
}
for area in Templeton_linear_shift_df['area'].unique():
    diff_from_null_Templ['area'].append(area)
    diff_from_null_Templ['diff_from_null_mean_Templ'].append((Templeton_linear_shift_df.query('area==@area')['true_accuracy']-
                                                 Templeton_linear_shift_df.query('area==@area')['null_accuracy_mean']).mean())
    diff_from_null_Templ['diff_from_null_median_Templ'].append((Templeton_linear_shift_df.query('area==@area')['true_accuracy']-
                                                    Templeton_linear_shift_df.query('area==@area')['null_accuracy_median']).mean())
    diff_from_null_Templ['n_expts_Templ'].append(len(Templeton_linear_shift_df.query('area==@area')))
diff_from_null_Templ_df=pd.DataFrame(diff_from_null_Templ)


all_frac_sig_df=pd.merge(frac_sig_DR_df,frac_sig_Templ_df,on='area',how='outer')
all_diff_from_null_df=pd.merge(diff_from_null_DR_df,diff_from_null_Templ_df,on='area',how='outer')

In [None]:
#barplot sorted by fraction significant
min_n_expts=2

plot_all_frac_sig_df=all_frac_sig_df.query('n_expts_DR>=@min_n_expts and n_expts_Templ>=@min_n_expts').sort_values('frac_sig_DR',ascending=False)
fig,ax=plt.subplots(1,1,figsize=(12,5))
plot_all_frac_sig_df.plot.bar(x='area',y=['frac_sig_DR','frac_sig_Templ'],ax=ax)
ax.set_ylabel('fraction significant decoding')
ax.set_xlabel('area')
ax.set_ylim([0,1])
fig.tight_layout()

In [None]:
#barplot sorted by DR diff from median
min_n_expts=2

plot_all_diff_from_null_df=all_diff_from_null_df.query('n_expts_DR>=@min_n_expts and n_expts_Templ>=@min_n_expts').sort_values('diff_from_null_median_DR',ascending=False)
fig,ax=plt.subplots(1,1,figsize=(12,5))
plot_all_diff_from_null_df.plot.bar(x='area',y=['diff_from_null_median_DR','diff_from_null_median_Templ'],ax=ax)

ax.set_ylabel('median difference from null')
ax.set_xlabel('area')

ax.set_ylim([-0.025,0.12])
fig.tight_layout()



In [None]:
Templeton_linear_shift_df

In [None]:
x_vect=np.arange(0,1.01,0.01)

fig,ax=plt.subplots(1,1)
ax.hist(1-DR_linear_shift_df['p_value'],bins=x_vect,alpha=0.5)
ax.hist(1-Templeton_linear_shift_df['p_value'],bins=x_vect,alpha=0.5)

ax.set_xlabel('zero shift percentile')
ax.set_ylabel('count (session-areas)')

ax.legend(['DR','Templeton'])

In [None]:
x_vect=np.arange(-0.1,0.3,0.01)

fig,ax=plt.subplots(1,1)
ax.hist(DR_linear_shift_df['true_accuracy']-DR_linear_shift_df['null_accuracy_median'],bins=x_vect)
ax.hist(Templeton_linear_shift_df['true_accuracy']-Templeton_linear_shift_df['null_accuracy_median'],bins=x_vect)

ax.set_xlabel('difference from null distribution median')
ax.set_ylabel('count (session-areas)')

ax.legend(['DR','Templeton'])