In [7]:
#Import packages
import pandas as pd
import h5py
import numpy as np
import scipy as sp
import scipy.signal as sg
import scipy.stats as st
import xarray as xr
import os
import matplotlib.pyplot as plt
from matplotlib import patches
import ast
from sklearn import svm
import glob
import re
import pickle

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold

import npc_lims
from npc_sessions import DynamicRoutingSession
from dynamic_routing_analysis import spike_utils, decoding_utils

%matplotlib widget

In [None]:
#change all this to work with npc_sessions framework

In [8]:
all_ephys_sessions = tuple(s for s in npc_lims.get_session_info(is_ephys=True) 
                            if s.is_uploaded and s.is_annotated and 
                            (s.project=='TempletonPilotSession' or s.project=='DynamicRouting'))



In [9]:
# test=np.load(r"\\allen\programs\mindscope\workgroups\templeton\TTOC\average video frames and motion\behavior\620263_2022-07-26_0_trial_avg_frames.npz", allow_pickle=True)

In [10]:
def load_trial_avg_pixels_motion(session,session_info,vid_angle):

    if 'Templeton' in session_info.project:
        main_vid_path=r'\\allen\programs\mindscope\workgroups\templeton\TTOC\average video frames and motion'
    elif 'DynamicRouting' in session_info.project:
        main_vid_path=r'\\allen\programs\mindscope\workgroups\dynamicrouting\Ethan\average video frames and motion'

    if vid_angle.lower()=='face':
        avg_frames_path=os.path.join(main_vid_path,'face')
    elif vid_angle.lower()=='behavior':
        avg_frames_path=os.path.join(main_vid_path,'behavior')
    
    frames_file=glob.glob(os.path.join(avg_frames_path,session.id+'*'))
    
    if len(frames_file)==0:
        return [],[]
    else:
        frames_file=frames_file[0]
    
    frames=np.load(frames_file)
    
    return frames['avg_prestim_frames'], frames['avg_prestim_motion']


# def custom_decoder(input_data,labels):
    
#     output={}
    
#     scaler = StandardScaler()
#     skf = StratifiedKFold(n_splits=5,shuffle=True)
    
#     scaler.fit(input_data)
#     X = scaler.transform(input_data)
#     y = labels
    
#     if len(np.unique(labels))>2:
#         y_dec_func=np.full((len(y),len(np.unique(labels))), fill_value=np.nan)
#     else:
#         y_dec_func=np.full(len(y), fill_value=np.nan)
 
#     if type(y[0])==bool:
#         ypred=np.full(len(y), fill_value=False)
#     elif type(y[0])==str:
#         ypred=np.full(len(y), fill_value='       ')
#     else:
#         ypred=np.full(len(y), fill_value=np.nan)

#     tidx_used=[]
    
#     coefs=[]
#     classes=[]
# #     feature_names=[]
#     intercept=[]
#     params=[]

#     for train,test in skf.split(X, y):
#         clf=svm.LinearSVC(max_iter=5000)
#         clf.fit(X[train],y[train])
#         ypred[test] = clf.predict(X[test])
#         y_dec_func[test] = clf.decision_function(X[test])
#         tidx_used.append([test])
#         coefs.append(clf.coef_)
#         classes.append(clf.classes_)
# #         feature_names.append(clf.feature_names_in_)
#         intercept.append(clf.intercept_)
#         params.append(clf.get_params())

#     cr_dict=classification_report(y, ypred, output_dict=True)
#     cr_df=pd.DataFrame.from_dict(cr_dict)

#     output['cr']=cr_df
#     output['pred_label']=ypred
#     output['true_label']=y
#     # output['trial_sel_idx']=trial_sel
#     output['trials_used']=tidx_used
#     output['decision_function']=y_dec_func
#     output['coefs']=coefs
#     output['classes']=classes
# #     output['feature_names']=feature_names
#     output['intercept']=intercept
#     output['params']=params
    
#     return output


In [30]:
savepath=r'\\allen\programs\mindscope\workgroups\templeton\TTOC\decoding results\use_raw_video'
filename='use_raw_video.pkl'

vid_angle='behavior'
trnum='all'
# f_num=500
# f_min=500
n_repeats=1
binsize=0.2
time_bins=np.arange(-0.2,0,binsize)
balance_labels=False
# keep_n_SVDs=500
n_block_repeats=100
crossval='5_fold'
crossval_index=None
labels_as_index=True

block_multipliers=[1,2,3,4,5,10]

# svc_results={}

for sel_session in all_ephys_sessions[-1:]:
    
    svc_results={}
    
    session=DynamicRoutingSession(sel_session)
    session_info=npc_lims.get_session_info(sel_session)
    print(session_info.id)

    if session_info.project=='TempletonPilotSession':
        generate_labels=True
    else:
        generate_labels=False

    mean_trial_frames,mean_trial_motion = load_trial_avg_pixels_motion(session,session_info,vid_angle)
    if len(mean_trial_frames)==0:
        print('no frame data, skipping experiment')
        continue
    print('frame data loaded')
    
    #mean_trial_behav_SVD[feature, trial]
    
    #try using running alone, motion alone, SVDs, or all of them
    
    #save metadata about this session & decoder params
    svc_results['metadata']=session_info
    svc_results['trial_numbers']=trnum
    svc_results['n_repeats']=n_repeats
    svc_results['time_bins']=time_bins
    svc_results['balance_labels']=balance_labels

    predict=['block_ids']
    p=predict[0]

    if p=='block_ids':
        #exclude any trials that had opto stimulation
        if 'opto_power' in session.trials[:].columns:
            trial_sel = session.trials[:].query('opto_power.isnull() and trial_index_in_block>=5').index
        else:
            trial_sel = session.trials[:].index

    for block_multiplier in block_multipliers:
        svc_results[block_multiplier]={}

        block_context_names=np.array(['vis','aud'])
        start_time=session.trials[:]['start_time'].iloc[0]
        context=np.full(len(session.trials[:]), fill_value='nan')
        block_nums=np.full(len(session.trials[:]), fill_value=np.nan)

        # make "real" subdivided blocks
        if session_info.project=='TempletonPilotSession':    
            if np.random.choice(block_context_names,1)=='vis':
                block_context_index=([0]*block_multiplier+[1]*block_multiplier)*3
            #elif np.random.choice(block_context_names,1)=='aud': #sometimes this if & elif aren't reached, IDK why
            else:
                block_context_index=([1]*block_multiplier+[0]*block_multiplier)*3
            block_contexts=block_context_names[block_context_index]
            for block in range(0,6*block_multiplier):
                block_start_time=start_time+block*(10/block_multiplier)*60
                block_end_time=start_time+(block+1)*(10/block_multiplier)*60
                block_trials=session.trials[:].query('start_time>=@block_start_time').index
                context[block_trials]=block_contexts[block]
                block_nums[block_trials]=block
            block_index=block_nums[trial_sel]
            pred_var=context[trial_sel]
            
        elif session_info.project=='DynamicRouting':
            if session.trials[:]['context_name'].iloc[0]=='vis':
                block_context_index=([0]*block_multiplier+[1]*block_multiplier)*3
            elif session.trials[:]['context_name'].iloc[0]=='aud':
                block_context_index=([1]*block_multiplier+[0]*block_multiplier)*3
            block_contexts=block_context_names[block_context_index]
            for block in range(0,6*block_multiplier):
                block_start_time=start_time+block*(10/block_multiplier)*60
                block_end_time=start_time+(block+1)*(10/block_multiplier)*60
                block_trials=session.trials[:].query('start_time>=@block_start_time').index
                context[block_trials]=block_contexts[block]
                block_nums[block_trials]=block
            block_index=block_nums[trial_sel]
            pred_var=context[trial_sel]
            # pred_var = session.trials[:]['context_name'][trial_sel].values

        #make psuedo blocks
        start_time=session.trials[:]['start_time'].iloc[0]
        fake_context=np.full(len(session.trials[:]), fill_value='nan')
        fake_block_nums=np.full(len(session.trials[:]), fill_value=np.nan)
        blocks=np.array([0,1]*3*block_multiplier)
        block_context_names=['vis','aud']

        block_index_pseudo=[]
        pred_var_pseudo=[]

        for nn in range(0,n_block_repeats):
            block_contexts=np.random.choice(blocks,len(blocks),replace=False)
            for block in range(0,6*block_multiplier):
                block_start_time=start_time+block*(10/block_multiplier)*60
                block_end_time=start_time+(block+1)*(10/block_multiplier)*60
                block_trials=session.trials[:].query('start_time>=@block_start_time').index
                fake_context[block_trials]=block_context_names[block_contexts[block]]
                fake_block_nums[block_trials]=block
            block_index_pseudo.append(fake_block_nums[trial_sel])
            pred_var_pseudo.append(fake_context[trial_sel])

        svc_results[block_multiplier]['block_index']=block_index
        svc_results[block_multiplier]['pred_var']=pred_var
        svc_results[block_multiplier]['block_index_pseudo']=block_index_pseudo
        svc_results[block_multiplier]['pred_var_pseudo']=pred_var_pseudo
        
        #loop through different ROIs
        for vid_type in ['frames','motion']:
            svc_results[block_multiplier][vid_type]={}
            if vid_type=='frames':
                vid_data=mean_trial_frames
            elif vid_type=='motion':
                vid_data=mean_trial_motion
            
            #loop through different labels to predict    
            # for p in predict:
            svc_results[block_multiplier][vid_type][p]={}

            # # or, use block IDs
            # if generate_labels == False:
            #     pred_var = session.trials[:]['context_name'][trial_sel].values
            # else:
            #     start_time=session.trials[:]['start_time'].iloc[0]
            #     fake_context=np.full(len(session.trials[:]), fill_value='nan')
            #     fake_block_nums=np.full(len(session.trials[:]), fill_value=np.nan)
            #     block_contexts=['vis','aud','vis','aud','vis','aud']
            #     for block in range(0,6):
            #         block_start_time=start_time+block*10*60
            #         block_end_time=start_time+(block+1)*10*60
            #         block_trials=session.trials[:].query('start_time>=@block_start_time').index
            #         fake_context[block_trials]=block_contexts[block]
            #         fake_block_nums[block_trials]=block
            #     fake_block_index=fake_block_nums[trial_sel]
            #     pred_var=fake_context[trial_sel]

            
            feature_sel = np.arange(0,vid_data.shape[0]*vid_data.shape[1])

            svc_results[block_multiplier][vid_type][p]['n_features']=len(feature_sel)

            #loop through time bins
            for tt,t_start in enumerate(time_bins):

                svc_results[block_multiplier][vid_type][p][tt]={}

                #loop through repeats
                for nn in range(0,n_repeats):
                    
                    #could select a subset of features here
                    feature_subset = feature_sel

                    #option to balance number of labels for training
                    if balance_labels:
                        subset_ind=[]
                        conds = np.unique(pred_var)
                        cond_count=[]

                        if trnum=='all':
                            for cc in conds:
                                cond_count.append(np.sum(pred_var==cc))
                            use_trnum=np.min(cond_count)
                        else:
                            use_trnum = trnum

                        for cc in conds:
                            cond_inds=np.where(pred_var==cc)[0]
                            if len(cond_inds)<use_trnum:
                                use_trnum=len(cond_inds)
                            subset_ind.append(np.random.choice(cond_inds,use_trnum,replace=False))   
                        subset_ind=np.sort(np.hstack(subset_ind))

                    else:
                        subset_ind=np.arange(0,len(trial_sel))


                    input_data = vid_data[:,:,subset_ind].reshape(
                        vid_data.shape[0]*vid_data.shape[1],len(subset_ind)).T
                    labels=pred_var[subset_ind].flatten()

                    if np.sum(np.isnan(input_data))>0:
                        continue

                    # svc_results[vid_type][p][tt][nn]=custom_decoder(
                    #     input_data=sel_data,
                    #     labels=pred_var[subset_ind].flatten())

                    # svc_results[vid_type][p][tt][nn]['shuffle']=custom_decoder(
                    #     input_data=sel_data,
                    #     labels=np.random.choice(pred_var[subset_ind],len(pred_var),replace=False).flatten())

                    # svc_results[block_multiplier][vid_type][p][tt][nn]=decoding_utils.linearSVC_decoder(
                    #             input_data=sel_data,
                    #             labels=pred_var[subset_ind].flatten(),
                    #             crossval=crossval,
                    #             crossval_index=crossval_index,
                    #             labels_as_index=labels_as_index)
                    svc_results[block_multiplier][vid_type][p][tt]['real']={}
                    svc_results[block_multiplier][vid_type][p][tt]['pseudo']={}
                    for bb in range(0,n_block_repeats):
                            
                        svc_results[block_multiplier][vid_type][p][tt]['real'][bb]=decoding_utils.linearSVC_decoder(
                        input_data=input_data,
                        labels=labels,
                        crossval=crossval,
                        crossval_index=crossval_index,
                        labels_as_index=labels_as_index
                        )
                        
                        temp_block_index=block_index_pseudo[bb]
                        temp_pred_var=pred_var_pseudo[bb][subset_ind]
                        if crossval=='blockwise':
                            pseudo_crossval_index=temp_block_index
                        else:
                            pseudo_crossval_index=None

                        svc_results[block_multiplier][vid_type][p][tt]['pseudo'][bb]=decoding_utils.linearSVC_decoder(
                            input_data=input_data,
                            labels=temp_pred_var,
                            crossval=crossval,
                            crossval_index=pseudo_crossval_index,
                            labels_as_index=labels_as_index
                            )

                    # svc_results[block_multiplier][vid_type][p][tt][nn]['trial_sel_idx']=trial_sel[subset_ind]
                    # svc_results[block_multiplier][vid_type][p][tt][nn]['feature_sel_idx']=feature_subset

    print(session_info.id+' done')

    with open(os.path.join(savepath,session.id+'_'+filename), 'wb') as handle:
        pickle.dump(svc_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

    

620263_2022-07-26
frame data loaded


  return cls(**config)


trialSoundArray empty; regenerating sound arrays


aligning sound waveforms: 100%|████████████| 673/673 [02:07<00:00,  5.29trial/s]


In [29]:
p

['block_ids']

In [13]:
# savepath=r'\\allen\programs\mindscope\workgroups\templeton\TTOC\decoding results\use_raw_video'
# with open(os.path.join(savepath,session.id+'_'+filename), 'wb') as handle:
#     pickle.dump(svc_results, handle, protocol=pickle.HIGHEST_PROTOCOL)    

In [14]:
svc_results.keys()

dict_keys(['metadata', 'trial_numbers', 'n_repeats', 'time_bins', 'balance_labels', 'frames', 'motion'])

In [25]:
session_info.id

'620263_2022-07-26'

In [24]:
svc_results['frames']['block_ids'][0][0]

{'cr': {'0': {'precision': 0.7126436781609196,
   'recall': 0.7230320699708455,
   'f1-score': 0.7178002894356006,
   'support': 343.0},
  '1': {'precision': 0.703125,
   'recall': 0.6923076923076923,
   'f1-score': 0.6976744186046512,
   'support': 325.0},
  'accuracy': 0.7080838323353293,
  'macro avg': {'precision': 0.7078843390804598,
   'recall': 0.7076698811392689,
   'f1-score': 0.7077373540201259,
   'support': 668.0},
  'weighted avg': {'precision': 0.7080125847443045,
   'recall': 0.7080838323353293,
   'f1-score': 0.708008510962459,
   'support': 668.0}},
 'pred_label': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1.,
        1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1.,
        1., 1., 1., 1., 1., 1., 1

In [21]:
all_ephys_sessions[-1]

SessionInfo(id='620263_2022-07-26', project='TempletonPilotSession', is_ephys=True, is_sync=True, allen_path=WindowsUPath('//allen/programs/mindscope/workgroups/templeton/TTOC/2022-07-26_14-09-36_620263'), experiment_day=1, session_kwargs={}, notes='', issues=[])