# Extract and Process hadntrack data Notebook
## Steps
1) Load raw handtrack/cam data
2) Do ts/cam lag review
3) Apply final processing steps (spatial regression and/or plotting)

Deps:
- pythonlib
- pyvm
    - with updated pyvm globals refelcting proper dirs
- drawmonkey, ideally as a env package

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from drawmonkey.tools.preprocess import loadSingleDataQuick
from pyvm.globals import BASEDIR
from drawmonkey.tools.handtrack import HandTrack
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#Basedir used by pipeline to find data and other stuff. Normally the basedir in pyvm globals is updated when pipeline
#shell scripts are run. In this case that will not happen, so this will need updated manually in pyvm.globals to be where
#ht data is stored
print(BASEDIR)

### STEP 1
Load HT data

In [None]:
## Load handtrack data
animal,date,expt,sess,sess_print = ('Diego','230913','dirgrammardiego5d','1','')
fd = loadSingleDataQuick(animal, date, expt, sess)
#Idk why this happens 

ind1_vid = 0
ind1_ml2 = 0
HT = HandTrack(ind1_vid,ind1_ml2,fd,date,expt,animal,sess_print)
HT.load_campy_data(ind1_ml2)

In [None]:
#Coefs available for this day
print(HT.Coefs)

#HT class function automatically run for both coefs, but allow user to manually choose
coefs = '220914_f12_dlc'

In [None]:
#Process data function
#Load if processed data already saved, else skip.

#Range of trials, 1 indexed
trange = range(10,20)

data_dir = f'{BASEDIR}/{animal}/{date}_{expt}{sess_print}'
skip_load = False
if os.path.exists(f'{data_dir}/processed_data.pkl') and not skip_load:
    with open(f'{data_dir}/processed_data.pkl','rb') as f:
        dat_trials = pickle.load(f)
else:
    #Maintain dict structure expected later
    dat_trials = {}
    dat_trials[coefs] = {}
    skipped_trials = []
    #Better to just do try except bc this takes a while and some, very infrequent, errors occur
    for trial_ml2 in trange:
        try:
            dat_trials[coefs][trial_ml2],_,_,= HT.process_data_singletrial(trial_ml2, coefs=coefs)
        except:
            skipped_trials.append(trial_ml2)

In [None]:
#Shorten to trange but maintain dict structure
dat_trials_short = {}
dat_trials_short[coefs] = {}
for trial,dat in dat_trials[coefs].items():
    if trial in trange:
        dat_trials_short[coefs][trial] = dat

### STEP 2
Do lag stuff

In [None]:
from pythonlib.tools.camtools import get_lags,finalize_alignment_data
#Where to save plots from lag fxn
outdir = f'{data_dir}/lags/{coefs}'
print(outdir)
lags={}
lags['corr_lags'] = get_lags(dat_trials_short,outdir,coefs,True)

Now go to outdir (data_dir/lags/coefs) and find the inds of good lags to take for final calculation

In [None]:
#Take index convention from the file name-- 'trial-stroke'
good_inds = ['10-0','12-0','15-0','16-0','17-0','18-0']
fig,corr_lag_mean = finalize_alignment_data(lags,good_inds)
fig.savefig(f'{outdir}/lag_fig.png')

### STEP 3
Load HT data again with regression and new lag num

In [None]:
dat_trials_lag = {}
skipped_trials = []
#Now can fit regression
HT.fit_regression(trange,corr_lag_mean,coefs,out=f'{data_dir}/transforms')

# assert corr_lag_mean > 0

for trial_ml2 in trange:
    try:
        #Will automatically adapt to include regressed data
        dat_trials_lag[trial_ml2],_,_,= HT.process_data_singletrial(trial_ml2, coefs=coefs,ts_cam_offset=corr_lag_mean)
        plt.close('all')
    except:
        skipped_trials.append(trial_ml2)