This notebook is to explore methods to compute flies velocity during ball pushing experiments using sleap based fly's tracking data. This is also the first notebook in which I experiment datasets handling with polars.

# Imports

In [8]:
from pathlib import Path
import json

import numpy as np

import h5py

import polars
import platform

# Get a list of the directories containing the tracking data

In [4]:
# Get the DataFolder

if platform.system() == "Darwin":
    DataPath = Path("/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos")
# Linux Datapath
if platform.system() == "Linux":
    DataPath = Path("/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos")

print(DataPath)

/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos


Make a list of the folders I want to use. For instance, I want to use the folders that have the "tnt" in the name as I will explore velocities for different crossings with UAS-TNT. I'm also only getting flies tested in the afternoon.

In [5]:
Folders = []
for folder in DataPath.iterdir():
    minfolder = str(folder).lower()
    if "tnt" in minfolder and "tracked" in minfolder and "pm" in minfolder:
        Folders.append(folder)

Folders

[PosixPath('/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos/230804_TNTscreen_PM_2_Videos_Tracked'),
 PosixPath('/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos/230808_TNTscreen_PM_3_Videos_Tracked'),
 PosixPath('/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos/230809_TNTscreen_PM_4_Videos_Tracked'),
 PosixPath('/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos/230803_TNTscreen_PM_1_Videos_Tracked')]

In [16]:
# Build a dataframe that will store the ball y positions and the arena and corridor numbers as metadata
Dataset = polars.DataFrame()

# Loop over all the .analysis.h5 files in the folder and store the ball y positions and the arena and corridor numbers as metadata
Flynum = 0
# Loop over all the folders that don't have "Dark" in the name
for folder in Folders:
    print(f"Adding experiment {folder} to the dataset...")
    # Read the metadata.json file
    with open(folder / "Metadata.json", "r") as f:
        metadata = json.load(f)
        variables = metadata["Variable"]
        metadata_dict = {}
        for var in variables:
            metadata_dict[var] = {}
            for arena in range(1, 10):
                arena_key = f"Arena{arena}"
                var_index = variables.index(var)
                metadata_dict[var][arena_key] = metadata[arena_key][var_index]
        
        print (metadata_dict)
        
    for file in folder.glob("**/*flytrack*.analysis.h5"):
        
        flypath = file
        with h5py.File(flypath.as_posix(), "r") as f:
            dset_names = list(f.keys())
            fly_locs = f["tracks"][:].T
            node_names = [n.decode() for n in f["node_names"][:]]
            
        
        yball = fly_locs[:, :, 1, :]
        
        # Get the filename from the path
        foldername = folder.name

        # Get the arena and corridor numbers from the parent (corridor) and grandparent (arena) folder names
        arena = file.parent.parent.name
        corridor = file.parent.name
        
        # Get the metadata for this arena
        arena_key = arena.capitalize()
        arena_metadata = {var: [metadata_dict[var][arena_key]] for var in metadata_dict}
        
        Flynum += 1
        
        # Load the start and end coordinates from coordinates.npy
        start, end = np.load(file.parent / 'coordinates.npy')
        
        # Store the ball y positions, start and end coordinates, and the arena and corridor numbers as metadata
        data = {"Fly": ["Fly" + str(Flynum)],
                "yball": [list(yball[:, 0, 0])], 
                "experiment": [foldername],
                "arena": [arena], 
                "corridor": [corridor],
                "start": [start],
                "end": [end]}
        data.update(arena_metadata)

        # Use pandas.concat instead of DataFrame.append
        Dataset = polars.concat([Dataset, polars.DataFrame(data)]) 

# Explode yball column to have one row per timepoint

#Dataset.drop(columns=["Genotye", "Date",], inplace=True)

Dataset = Dataset.explode('yball')



Adding experiment /mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos/230804_TNTscreen_PM_2_Videos_Tracked to the dataset...
{'Date': {'Arena1': '230804', 'Arena2': '230804', 'Arena3': '230804', 'Arena4': '230804', 'Arena5': '230804', 'Arena6': '230804', 'Arena7': '230804', 'Arena8': '230804', 'Arena9': '230804'}, 'Genotype': {'Arena1': 'TNTxTH', 'Arena2': 'TNTxE-PG', 'Arena3': 'TNTxMB247', 'Arena4': 'TNTxLAL1', 'Arena5': 'TNTxDDC', 'Arena6': 'PR', 'Arena7': 'Dnc', 'Arena8': 'rut', 'Arena9': 'TNTxLAL2'}, 'Period': {'Arena1': 'PM', 'Arena2': 'PM', 'Arena3': 'PM', 'Arena4': 'PM', 'Arena5': 'PM', 'Arena6': 'PM', 'Arena7': 'PM', 'Arena8': 'PM', 'Arena9': 'PM'}, 'FeedingState': {'Arena1': 'starved_noWater', 'Arena2': 'starved_noWater', 'Arena3': 'starved_noWater', 'Arena4': 'starved_noWater', 'Arena5': 'starved_noWater', 'Arena6': 'starved_noWater', 'Arena7': 'starved_noWater', 'Arena8': 'starved_noWater', 'Arena9': 'starved_noWater'}, 'Orientation': {'Arena1': 'std',

In [17]:
Dataset.head()

Fly,yball,experiment,arena,corridor,start,end,Date,Genotype,Period,FeedingState,Orientation,Light
str,f64,str,str,str,i64,i64,str,str,str,str,str,str
"""Fly1""",454.646393,"""230804_TNTscre…","""arena2""","""corridor5""",386,96,"""230804""","""TNTxE-PG""","""PM""","""starved_noWate…","""std""","""on"""
"""Fly1""",454.683746,"""230804_TNTscre…","""arena2""","""corridor5""",386,96,"""230804""","""TNTxE-PG""","""PM""","""starved_noWate…","""std""","""on"""
"""Fly1""",454.58078,"""230804_TNTscre…","""arena2""","""corridor5""",386,96,"""230804""","""TNTxE-PG""","""PM""","""starved_noWate…","""std""","""on"""
"""Fly1""",454.42749,"""230804_TNTscre…","""arena2""","""corridor5""",386,96,"""230804""","""TNTxE-PG""","""PM""","""starved_noWate…","""std""","""on"""
"""Fly1""",454.704865,"""230804_TNTscre…","""arena2""","""corridor5""",386,96,"""230804""","""TNTxE-PG""","""PM""","""starved_noWate…","""std""","""on"""


In [18]:
Dataset['Genotype']

Genotype
str
"""TNTxE-PG"""
"""TNTxE-PG"""
"""TNTxE-PG"""
"""TNTxE-PG"""
"""TNTxE-PG"""
"""TNTxE-PG"""
"""TNTxE-PG"""
"""TNTxE-PG"""
"""TNTxE-PG"""
"""TNTxE-PG"""


In [19]:

#Dataset['yball'] = Dataset['yball'].astype(float)

print('Computing yball relative to start...')
# Compute yball_relative relative to start
Dataset['yball_relative'] = abs(Dataset['yball'] - Dataset['start'])

# Fill missing values using linear interpolation
Dataset['yball_relative'] = Dataset['yball_relative'].interpolate(method='linear')

Dataset.drop(columns=["yball",], inplace=True)


# Filter parameters
cutoff = 0.0015  # desired cutoff frequency of the filter, Hz ,      slightly higher than actual 1.2 Hz
order = 1  # sin wave can be approx represented as quadratic

#Dataset['yball_smooth'] = butter_lowpass_filter(Dataset['yball_relative'], cutoff, order)
print('Filtering yball relative to start...')
Dataset['yball_SG'] = savgol_lowpass_filter(Dataset['yball_relative'], 221, 1)

print('Removing yball relative to start...')
Dataset.drop(columns=["yball_relative",], inplace=True)

print('Defining frame and time columns...')
Dataset["Frame"] = Dataset.groupby("Fly").cumcount()

Dataset["time"] = Dataset["Frame"] / 30

# Remove the original yball column

print('Removing Frame column...')
Dataset.drop(columns=["Frame",], inplace=True)

print('Resetting index...')
Dataset.reset_index(drop=True, inplace=True)

Dataset.head()

Computing yball relative to start...


TypeError: DataFrame object does not support `Series` assignment by index.

Use `DataFrame.with_columns`.