## this code trims the tracked points to the correct start and stop frames as manually annotated from videos.

#### Tanvi Deora, March 12 2025

In [1]:
import pandas as pd
import numpy as np
import glob
from scipy import signal

import plotly.graph_objects as go

In [2]:
files = glob.glob('./Data/Outputs/Raw_xyz_points/Musca-Musca_Chase_ProjectFiles_Metadata/*Trial*xyzpts.csv')
files = np.delete(files, [1])
files

array(['./Data/Outputs/Raw_xyz_points/Musca-Musca_Chase_ProjectFiles_Metadata\\2023-04-19_Trial7_Set1_5k_xyzpts.csv',
       './Data/Outputs/Raw_xyz_points/Musca-Musca_Chase_ProjectFiles_Metadata\\2023-04-21_Trial5_1k_xyzpts.csv',
       './Data/Outputs/Raw_xyz_points/Musca-Musca_Chase_ProjectFiles_Metadata\\2023-04-21_Trial6_1k_xyzpts.csv',
       './Data/Outputs/Raw_xyz_points/Musca-Musca_Chase_ProjectFiles_Metadata\\2023-04-21_Trial7_1k_xyzpts.csv',
       './Data/Outputs/Raw_xyz_points/Musca-Musca_Chase_ProjectFiles_Metadata\\2023-04-28_Trial4_5k_xyzpts.csv',
       './Data/Outputs/Raw_xyz_points/Musca-Musca_Chase_ProjectFiles_Metadata\\2023-04-28_Trial5_5k_xyzpts.csv',
       './Data/Outputs/Raw_xyz_points/Musca-Musca_Chase_ProjectFiles_Metadata\\2023-04-28_Trial8_1k_xyzpts.csv'],
      dtype='<U106')

In [3]:
info = pd.read_csv('./Data/Outputs/Raw_xyz_points/metadata.csv')
info

Unnamed: 0,filename,start,end,framerate,chase_type,Chaser,Chasee,Notes
0,2023-04-19_Trial7_Set1_5k_xyzpts.csv,487.0,926.0,5000,Intact male chasing female,I_M,I_F,Perfect
1,2023-04-19_Trial8_Set1_5k_xyzpts.csv,,,5000,Intact male chasing male,I_M,I_M,80% digitized. Needs to fill some gaps
2,2023-04-21_Trial5_1k_xyzpts.csv,115.0,283.0,1000,Intact male chasing ablated male,I_M,A_M,Can't find the project file. Only the xyz pts ...
3,2023-04-21_Trial6_1k_xyzpts.csv,56.0,276.0,1000,Intact male chasing intact male,I_M,I_M,Perfect
4,2023-04-21_Trial7_1k_xyzpts.csv,112.0,703.0,1000,Intact male chasing intact male,I_M,I_M,Perfect
5,2023-04-28_Trial4_5k_xyzpts.csv,332.0,843.0,5000,Ablated male chasing intact male,A_M,I_M,Perfect
6,2023-04-28_Trial5_5k_xyzpts.csv,1420.0,2510.0,5000,Ablated male chasing intact male,A_M,I_M,Perfect
7,2023-04-28_Trial6_5k_xyzpts.csv,58.0,1164.0,5000,Ablated male chasing intact male,A_M,I_M,Can't find the project file. Only the xyz pts ...
8,2023-04-28_Trial8_1k_xyzpts.csv,108.0,276.0,1000,Intact male chasing ablated male,I_M,A_M,Perfect


In [4]:
def trimPointsToVideo(df, start, stop):
    new_df = df.loc[start:stop, :]
    return(new_df)

In [9]:
def subsample_1k(data, framerate):
    fs_target = 1000    # target sampling rate
    if framerate == fs_target:
        data_1k = data
    else:
        factor = framerate // fs_target 
        # Subsample by keeping every 5th sample
        data_1k = data.loc[::factor, :]
    return(data_1k)

In [10]:
for f in files:
    df = pd.read_csv(f)
     
    df.head()
    # get the start, top points
    frames = info.loc[info['filename'] == (f.split('\\')[-1]),['start','end']].values.astype('int32')[0]
    # add -1 to adjust to python starting from 0th number
    start, stop = frames[0]-1, frames[1]-1
    new_df = trimPointsToVideo(df, start, stop)
    
    # interpolate to get rid of NaNs
    inter_df = new_df.interpolate(method='spline', order = 3, axis=0, limit = 10)
    
    # get framerate to subsample everything to 1k
    framerate = [row['framerate'] for ind, row in info.iterrows() if row['filename'][:-10] in f.split('\\')[-1]][0]
    df_1k = subsample_1k(inter_df, framerate)
    
    # smoothen data with savgol s filter
    smooth_df = pd.DataFrame(index = df_1k.index, columns = df_1k.columns)
    for name, series in df_1k.items():
        smooth_df[name] = signal.savgol_filter(series.values,
                           20, # window size used for filtering
                           3) # order of fitted polynomial
        
    name = f.split('\\')[-1][:-11]
    smooth_df.to_csv('./Data/Outputs/Raw_xyz_points/' + name + '_smoothened_interpol_sampled1k_xyzpts.csv')