In [1]:
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
from scipy import signal
import sys

In [2]:
def GetRMSE(x2, y2, x1, y1):
    from scipy.spatial.distance import pdist

    er = []
    for idx in range(len(x2)):
        X = np.asarray([[x1[idx], y1[idx]], 
                         [x2[idx], y2[idx]]])
        temp_er = pdist(X,metric = 'euclidean')
        er.append(temp_er[0])
    er = np.asarray(er)
    return(er)

In [3]:
# parameters used to filter data

cutoff = 24

interpol_order = 3 # order for polynomial interpolation

# win_gauss = signal.gaussian(10,3) # gaussian kernal for smoothening interpolated data

# parameters for the savitzky-golay filter
savgol_win = 11
savgol_polyorder = 3

# window_length=7, polyorder=2,

# ROLLING_WINDOW = [11, 21, 31][0]

### remove x y points based on threshold ( of rmse distance from previous frame ) 

In [4]:
circ_parameters_path = glob.glob('../dataFolders/PaperPipelineOutput/CircleParameters/' + '*.csv')
circ_parameters = pd.read_csv(circ_parameters_path[0])

In [26]:
direc = r"../dataFolders/PaperPipelineOutput/RawTracks/"
visitnum = ['FirstVisit/', 'Later7thVisit/' ,'LaterVisit/', 'LastVisit/']

for visit in visitnum[1:2]:
    path = os.path.join(direc, visit)
    trackslist = glob.glob(path + '*.csv')
    
    outpath = os.path.join('../dataFolders/PaperPipelineOutput/FilteredTracks_v2/ManualCleanup/', visit )
    print(outpath)

../dataFolders/PaperPipelineOutput/FilteredTracks_v2/ManualCleanup/Later7thVisit/


In [27]:
for data in trackslist:
#     data = trackslist[0]
    name = os.path.basename(data)[:-4]
    print('working on ' + name)
        
    file = pd.read_csv(data)
    x = file.x.values
    y = file.y.values
    p = file.likelihood

    x_notinView = x <=5
    y_notinView = y <=5

    x[x_notinView & y_notinView]=np.nan
    y[x_notinView & y_notinView]=np.nan


    # add filter for DLC likelihood
    med = file['likelihood'].rolling(11).median()
    x[med < 0.6] = np.nan
    y[med < 0.6] = np.nan

    if x.size == 0 or y.size == 0:
        print(name + 'has emtpy x y tracks')
        continue
    
    name = [n for n in circ_parameters.name if n + '_' in data][0]

    circ_x = circ_parameters.loc[circ_parameters.name == name, 'circ_x'].values
    circ_y = circ_parameters.loc[circ_parameters.name == name, 'circ_y'].values
    circ_radii = circ_parameters.loc[circ_parameters.name == name, 'circ_radii'].values
    
    # get rmse values for subsequent frames
    rmse = GetRMSE(x[1:], y[1:], x[:-1], y[:-1])

    filtered_x = np.copy(x[1:])
    filtered_y = np.copy(y[1:])

    filtered_x[(rmse > cutoff) | (rmse == np.nan)] = np.nan
    filtered_y[(rmse > cutoff) | (rmse == np.nan)] = np.nan

    filtered_r = np.linalg.norm([filtered_x - circ_x, filtered_y - circ_y], axis = 0)
    filtered_r = filtered_r/circ_radii
    filt_trajectory = pd.DataFrame([filtered_x, filtered_y, filtered_r]).T
    filt_trajectory.columns = ['x', 'y', 'r']
    
    # Apply filters

    trajectory = filt_trajectory.copy()
    print(trajectory.shape)
    
    # for problem cases - can't do polynomial interpolation, send the raw tracks
    
    # first visit
#     if name in ['c-1_m2', 'c-2_m2']:
#         print('did not filter for %s', name)
    
#     for 7th visit
    if name in ['c-10_m2', 'c-1_m1']:
        print('did not filter for %s', name)
        
    # for LaterVisit/
#     if name in ['c-1_m1', 'c-2_m2']:
#         print('did not filter for %s', name)
        
# #     for LastVisit/
#     if name in ['c-10_m1', 'c-1_m2', 'c-2_m2', 'c-3_m21_cropped']:
#         print('did not filter for %s', name)
        
    else:
        
        for colname in trajectory.columns:
    #         print(colname)
            trajectory.loc[:, colname] = signal.medfilt(trajectory.loc[:, colname], kernel_size=11)
            trajectory.loc[:, colname] = trajectory.loc[:, colname].interpolate(method = 'polynomial', order = 3, limit = 40)

            nans = trajectory.loc[:,colname].isnull()
            trajectory.loc[:,colname] = trajectory.loc[:,colname].interpolate(method = 'pad')
            trajectory.loc[:, colname] = signal.savgol_filter(trajectory.loc[:, colname],
                                                              window_length=savgol_win,
                                                              polyorder=savgol_polyorder,
                                                              axis=0)
            trajectory.loc[nans, colname]= np.nan
        
    trajectory_r = np.linalg.norm([trajectory.loc[:,'x'].values - circ_x, trajectory.loc[:,'y'].values - circ_y], axis = 0)
    trajectory['r'] = trajectory_r/circ_radii
    
# #     fig = plt.figure()
#     axes = pd.concat([filt_trajectory, trajectory], axis = 1).plot(subplots = True, figsize = (15,8))
#     fig = plt.gcf()
# #     .get_figure()
#     fig.savefig(outpathfig + name + '_' + visit[:-1] + '.pdf')
#     plt.close() 
        
    trajectory.to_csv(outpath + name + '_' + visit[:-1] + '.csv')

working on c-10_m11_visit_6
(372, 3)




working on c-10_m12_visit_6
(1396, 3)
working on c-10_m13_visit_6
(463, 3)
working on c-10_m15_visit_6
(1172, 3)
working on c-10_m16_cropped_visit_6
(1973, 3)
working on c-10_m17_cropped_visit_6
(5616, 3)
working on c-10_m19_cropped_visit_6
(926, 3)
working on c-10_m20_cropped_visit_6
(412, 3)
working on c-10_m21_cropped_visit_6
(544, 3)
working on c-10_m22_cropped_visit_6
(509, 3)
working on c-10_m23_cropped_visit_6
(494, 3)
working on c-10_m24_cropped_visit_6
(597, 3)
working on c-10_m25_visit_6
(604, 3)
working on c-10_m2_visit_6
(412, 3)
did not filter for %s c-10_m2
working on c-10_m3_visit_6
(1735, 3)
working on c-10_m6_visit_6
(625, 3)
working on c-10_m8_visit_6
(568, 3)
working on c-10_m9_visit_6
(371, 3)
working on c-1_m10_visit_6
(122, 3)
working on c-1_m11_visit_6
(1928, 3)
working on c-1_m13_visit_6
(365, 3)
working on c-1_m14_visit_6
(233, 3)
working on c-1_m17_visit_6
(411, 3)
working on c-1_m18_cropped_visit_6
(247, 3)
working on c-1_m19_visit_6
(196, 3)
working on c-1_m

## Get frame start and stop for all moths

In [28]:
# use step 5 in moth learning to read frame IN and OUT for moths tracks

direc = r"../../MothLearning/dataFolders/Output/Step5_FilesWith_TrueTrialAnd_ProboscisDetect_v2/"
mothlist = glob.glob(direc + '*.csv')

In [29]:
pathlist = glob.glob(outpath + "c*.csv")

In [30]:
vnum = [0, 6, 19, -1]

mothID = []
startF = []
stopF= []

for file in pathlist:

        name = os.path.basename(file)
        name = name.split('_')[0] + "_" + name.split('_')[1]
        #     # insert our annotations at the right location in the dltdv files
        m = [f for f in mothlist if (name + '_') in f][0]
        frameInfo = pd.read_csv(m)

        start = frameInfo.MothIN.iloc[vnum[1]] +1
        stop = frameInfo.MothOut.iloc[vnum[1]]

        mothID.append(name)
        startF.append(start)
        stopF.append(stop)

bla = pd.DataFrame([mothID, startF, stopF]).T
bla.columns = ['mothID', 'startFrame', 'StopFrame']
bla.to_csv(outpath + 'frameNumberReference.csv', index = False)

IndexError: single positional indexer is out-of-bounds

In [22]:
frameInfo.MothIN.iloc[0] +1

35709

In [31]:
name

'c-2_m2'

In [25]:
m

'../../MothLearning/dataFolders/Output/Step5_FilesWith_TrueTrialAnd_ProboscisDetect_v2\\c-2_m2_RawDataForExplorationTime.csv'

In [32]:
frameInfo

Unnamed: 0.1,Unnamed: 0,MothIN,MothOut,ProboscisDetect,DiscoveryTime
0,0,35708,35866,,
1,1,37375,40347,38499.0,1124.0
2,2,41071,42629,41748.0,677.0
3,3,47090,48850,47639.0,549.0
4,4,52625,52832,,
