# Analysis of the tracking data
Here, the automated fly tracking is analyzed. Velocity, distance and wing angles are calculated for each fly and over time. The traces are smoothed and aligned to the stimulus onset. Median traces and median differences between stimulation and pre-stimulation period are calculated. Results are exported for plotting and statistical testing. 

Author: Bjarne Schultze <br>
Last modified: 29.11.2024

In [2]:
# Necessary modules import
import numpy as np
import pandas as pd
import h5py

import modules.analysis_utils as utils
import modules.data_handling_utils as dutils

In [3]:
# Choose an experimental group
exp_grp = "pIP10"

#### Preparations

Load the metadata, get the experiment names, load stimulus information

In [None]:
# Define main results and metadata path
main_path = "E:/res/"
metadata_path = "./accessory_files/metadata.pkl"

# Load metadata file (reduced versio but including the male position in the tracking data)
exp_metadat = pd.read_csv("./accessory_files/experiment_names.txt", header=None, names=["filename","group","condition","male_position"])
# Reduce to the information on the current experimental group
exp = exp_metadat.loc[exp_metadat["group"] == exp_grp, :]
# Get the positions of the males in the tracking data
male_pos_m = exp.loc[exp["condition"] == "m", "male_position"].values.astype("int")
male_pos_mf = exp.loc[exp["condition"] == "mf", "male_position"].values.astype("int")
# Get the experiment names
experiments = exp.loc[exp["condition"] == "m", "filename"].to_list()
experiments_mf = exp.loc[exp["condition"] == "mf", "filename"].to_list()

# Store the audio and tracking sampling rates
sampling_rate = 10000    # [Hz]
sampling_rate_tracking = 1000   # [Hz]

# If certain filename among the experiments, exclude it because tracking data are missing
if "localhost-20240419_085135" in experiments_mf:
    # Logical index for the experiment name
    exclude_idx = "localhost-20240419_085135" != np.array(experiments_mf)
    # Remove entry from experiment names and male positions array
    experiments_mf = np.array(experiments_mf)[exclude_idx].tolist()
    male_pos_mf = male_pos_mf[exclude_idx]
else:
    experiments_mf = experiments_mf


# Get the stimulus on- and offsets for all experiments
_, stim_on_mf, stim_off_mf, stim_volt_mf, bad_files = dutils.load_annotations(experiments_mf, metadata_path=metadata_path, 
                                                                              main_path=main_path)
_, stim_on, stim_off, stim_volt = dutils.load_annotations(experiments, metadata_path=metadata_path, 
                                                          main_path=main_path, check_full_stim_set=False)

# Remove bad files from experiment list
if len(bad_files) > 0:
    [ experiments_mf.remove(bf) for bf in bad_files ]

# Check if there are data for both conditions (sm: single male, mf: male-female)
if len(stim_on_mf) > 0: mf_cond = True
else: mf_cond = False
if len(stim_on) > 0: sm_cond = True
else: sm_cond = False

# Get the stimulus length [s]
stim_len = stim_off[0][0] - stim_on[0][0]

window_size = 300

Skipped experiment localhost-20240503_103043! Incomplete stimulus set before copulation.
Skipped experiment localhost-20240506_114629! Incomplete stimulus set before copulation.


#### Analysis
Load the tracking data, slice it around the stimuli, average it across trials and animals. Quantify the changes in certain metrics upon stimulation.

In [None]:
# Allocate a list to store the sliced tracking data
tracking_sliced_mf_m = []
tracking_sliced_mf_f = []
tracking_sliced = []
rel_tracking_sliced_m = []
rel_tracking_sliced_f = []

if mf_cond:
    # Iterate over the experiments (male+female)
    for file,st_on,st_off,m_pos in zip(experiments_mf,stim_on_mf, stim_off_mf,male_pos_mf):
        # Load the data (stimulus set must not be checked as bad experiments were removed from list)
        tracking_mf, times_mf, indices = dutils.load_tracking(file, dataset="abs", main_path=main_path, 
                                                              metadata_path=metadata_path, check_full_stim_set=False)
        # Load the relative tracking data (distance, relative speed, etc.)
        rel_tracking, times, rel_indices = dutils.load_tracking(file, dataset="rel", main_path=main_path, 
                                                                metadata_path=metadata_path, check_full_stim_set=False)

        # If tracking data is not empty, slice the data around the stimuli and append the results
        if len(tracking_mf) > 0:
            # Smooth the data
            tracking_mf_m = pd.DataFrame(tracking_mf[0][:,m_pos,:]).rolling(window_size, win_type="gaussian", 
                                                                            min_periods=100, step=1, center=True).mean(std=80).to_numpy()
            tracking_mf_f = pd.DataFrame(tracking_mf[0][:,abs(m_pos-1),:]).rolling(window_size, win_type="gaussian", 
                                                                                   min_periods=100, step=1, center=True).mean(std=80).to_numpy()

            rel_tracking_m = pd.DataFrame(rel_tracking[0][:,m_pos,abs(m_pos-1),:]).rolling(window_size, win_type="gaussian", 
                                                                                           min_periods=100, step=1, center=True).mean(std=80).to_numpy()
            rel_tracking_f = pd.DataFrame(rel_tracking[0][:,abs(m_pos-1),m_pos,:]).rolling(window_size, win_type="gaussian", 
                                                                                           min_periods=100, step=1, center=True).mean(std=80).to_numpy()

            # Slice the data
            tracking_sliced_mf_m.append(utils.slice_at_stim(tracking_mf_m, st_on, st_off, sampling_rate=sampling_rate_tracking, output="full"))
            tracking_sliced_mf_f.append(utils.slice_at_stim(tracking_mf_f, st_on, st_off, sampling_rate=sampling_rate_tracking, output="full"))

            rel_tracking_sliced_m.append(utils.slice_at_stim(rel_tracking_m, st_on, st_off, sampling_rate=sampling_rate_tracking, output="full"))
            rel_tracking_sliced_f.append(utils.slice_at_stim(rel_tracking_f, st_on, st_off, sampling_rate=sampling_rate_tracking, output="full"))

    # Concatenate all slices 
    tracking_sliced_mf_m = np.concatenate(tracking_sliced_mf_m)
    tracking_sliced_mf_f = np.concatenate(tracking_sliced_mf_f)
    rel_tracking_sliced_m = np.concatenate(rel_tracking_sliced_m)
    rel_tracking_sliced_f = np.concatenate(rel_tracking_sliced_f)

    # Calculate median of the tracking data over all slices (over all stimuli, ignore nan values)
    tracking_med_mf_m = np.nanmedian(tracking_sliced_mf_m, axis=0)
    tracking_med_mf_f = np.nanmedian(tracking_sliced_mf_f, axis=0)

    rel_tracking_med_mf_m = np.nanmedian(rel_tracking_sliced_m, axis=0)
    rel_tracking_med_mf_f = np.nanmedian(rel_tracking_sliced_f, axis=0)

if sm_cond:
    # Iterate over the experiments (male)
    for file,st_on,st_off, m_pos in zip(experiments, stim_on, stim_off, male_pos_m):
        # Load the data (stimulus set must not be checked for solitary male experiments)
        tracking, times, indices = dutils.load_tracking(file, dataset="abs", main_path=main_path, 
                                                        metadata_path=metadata_path, check_full_stim_set=False)
        # If tracking data is not empty, slice the data around the stimuli and append the results
        if len(tracking) > 0:
            # Smooth the data
            tracking_m = pd.DataFrame(tracking[0][:,m_pos,:]).rolling(window_size, min_periods=100, step=1).mean().to_numpy()

            tracking_sliced.append(utils.slice_at_stim(tracking_m, st_on, st_off, sampling_rate=sampling_rate_tracking, output="full"))

    # Concatenate the slices
    tracking_sliced = np.concatenate(tracking_sliced)
    # Median over all slices (ignore nan values)
    tracking_med_m = np.nanmedian(tracking_sliced[:,:,:], axis=0)
else:
    tracking_sliced = np.array([])

In [5]:
# Get indices for certain metrics
velo_idx = np.where(indices[0] == "velocity_magnitude")[0][0]
accmag_idx = np.where(indices[0] == "acceleration_mag")[0][0]
wingang_idx = np.where(indices[0] == "wing_angle_sum")[0][0]
wing_sum_idx = np.where(indices[0] == "wing_angle_sum")[0][0]

if mf_cond:
    dist_idx = np.where(rel_indices[0] == "distance")[0][0]

In [6]:
# Create a suitable time vector for the analysis
if exp_grp == "pIP10":
    time_vec = np.arange(-5, 10, 1/sampling_rate_tracking)
else:
    time_vec = np.arange(-5, 9, 1/sampling_rate_tracking)

In [None]:
## Calculate the changes in the metrics upon stimulation
if sm_cond:
    # Get the numbers of trials per experiment
    trial_nums = [ m.shape[0] for m in stim_volt ]
    # Create fake-stimulus intensities (all 1) for averaging over all intensities
    fake_stim_volts = np.concatenate([ np.ones(stv.shape[0]) for stv in stim_volt ])

    # Calculate the change in forward velocity and wing angle (averaged over all intensities)
    change_velo = utils.contdata_fi_curve(tracking_sliced[:,:, velo_idx], time_vec, fake_stim_volts, 
                                          stim_period=[0,stim_len], trial_nums=trial_nums).T[0]
    change_wingang = utils.contdata_fi_curve(tracking_sliced[:,:, wingang_idx], time_vec, fake_stim_volts, 
                                             stim_period=[0,stim_len], trial_nums=trial_nums).T[0]
else:
    # Create empty if no data available
    change_velo = np.array([])
    change_wingang = np.array([])

if mf_cond:
    # Get the numbers of trials per experiment 
    trial_nums_mf = [ m.shape[0] for m in stim_volt_mf ]
    # Create fake-stimulus intensities (all 1) for averaging over all intensities
    fake_stim_volts_mf = np.concatenate([ np.ones(stv.shape[0]) for stv in stim_volt_mf ])

    # Calculate the changes in forward velocity for male and female
    change_velo_m = utils.contdata_fi_curve(tracking_sliced_mf_m[:,:, velo_idx], time_vec, fake_stim_volts_mf, 
                                            stim_period=[0,stim_len], trial_nums=trial_nums_mf).T[0]
    change_velo_f = utils.contdata_fi_curve(tracking_sliced_mf_f[:,:, velo_idx], time_vec, fake_stim_volts_mf, 
                                            stim_period=[0,stim_len], trial_nums=trial_nums_mf).T[0]
    # Calculate the changes in wing angle
    change_wingang_m = utils.contdata_fi_curve(tracking_sliced_mf_m[:,:, wingang_idx], time_vec, fake_stim_volts_mf, 
                                               stim_period=[0,stim_len], trial_nums=trial_nums_mf).T[0]
    # Calculate the changes in male-female distance
    change_dist = utils.contdata_fi_curve(rel_tracking_sliced_m[:,:, dist_idx], time_vec, fake_stim_volts_mf, 
                                          stim_period=[0,stim_len], trial_nums=trial_nums_mf).T[0]
else:
    # Create empty if no data available
    change_velo_m = np.array([])
    change_velo_f = np.array([])
    change_wingang_m = np.array([])
    change_dist = np.array([])

#### Export the data

In [8]:
# Combine all changes into a dictionary
all_data_dict = {}
all_data_dict["condition"] = np.concatenate([np.repeat("mf", change_velo_m.shape[0]), np.repeat("m", change_velo.shape[0])])
all_data_dict["change_velocity"] = np.concatenate([change_velo_m, change_velo])
all_data_dict["change_velocity_f"] = np.concatenate([change_velo_f, np.repeat(np.nan, change_velo.shape[0])])
all_data_dict["change_wingangle"] = np.concatenate([change_wingang_m, change_wingang])
all_data_dict["change_distance"] = np.concatenate([change_dist, np.repeat(np.nan, change_velo.shape[0])])

# Transform dictionary to data frame
all_data_df = pd.DataFrame.from_dict(all_data_dict)
# Export to csv
all_data_df.to_csv(f"E:/MT/additional_files/{exp_grp}_changes_tracking.csv")

In [9]:
# Open a data file for writing
fdat = h5py.File(f"../additional_files/{exp_grp}_tracks.hdf5", "w")

try:
    if mf_cond:
        # Store the average tracking metrics
        tracks_mf_male = fdat.create_dataset("/male_female/med_tracks_male", data=tracking_med_mf_m)
        tracks_mf_female = fdat.create_dataset("/male_female/med_tracks_female", data=tracking_med_mf_f)
        # Label the dimensions
        tracks_mf_male.dims[0].label = "time [s]"
        tracks_mf_male.dims[1].label = "tracking metrics"
        tracks_mf_female.dims[0].label = "time [s]"
        tracks_mf_female.dims[1].label = "tracking metrics"
        # Save an index and a description
        tracks_mf_male.attrs.create("index", np.array(indices[0], dtype="S"))
        tracks_mf_male.attrs.create("description", """Tracking metrics median for all male animals and stimulus intensities. Male-female.""")
        tracks_mf_female.attrs.create("index", np.array(indices[0], dtype="S"))
        tracks_mf_female.attrs.create("description", """Tracking metrics median for all female animals and stimulus intensities. Male-female.""")

        rel_tracks_m = fdat.create_dataset("male_female/med_relative_tracks_male", data=rel_tracking_med_mf_m)
        rel_tracks_f = fdat.create_dataset("male_female/med_relative_tracks_female", data=rel_tracking_med_mf_f)
        # Label the dimensions
        rel_tracks_m.dims[0].label = "time [s]"
        rel_tracks_m.dims[1].label = "tracking metrics"
        rel_tracks_f.dims[0].label = "time [s]"
        rel_tracks_f.dims[1].label = "tracking metrics"
        # Save an index and a description
        rel_tracks_m.attrs.create("index", np.array(rel_indices[0], dtype="S"))
        rel_tracks_m.attrs.create("description", """Relative tracking metrics median for all male animals and stimulus intensities. Male-female.""")
        rel_tracks_f.attrs.create("index", np.array(rel_indices[0], dtype="S"))
        rel_tracks_f.attrs.create("description", """Relative tracking metrics median for all female animals and stimulus intensities. Male-female.""")

        # Store the changes upon stimulation
        chg_velo_mf_male = fdat.create_dataset("male_female/change_velocity_male", data=change_velo_m)
        chg_velo_mf_female = fdat.create_dataset("male_female/change_velocity_female", data=change_velo_f)
        # Label the dimensions 
        chg_velo_mf_male.dims[0].label = "animals"
        chg_velo_mf_female.dims[0].label = "animals"
        # Save descriptions
        chg_velo_mf_male.attrs.create("description", """"Median change in velocity between during and prior to stimulation.
        The changes represent the average changes over all stimulus intensities. Male-female.""")
        chg_velo_mf_female.attrs.create("description", """"Median change in velocity between during and prior to stimulation.
        The changes represent the average changes over all stimulus intensities. Male-female.""")

        chg_dist = fdat.create_dataset("male_female/change_distance", data=change_dist)
        # Label the dimensions 
        chg_dist.dims[0].label = "animals"
        # Add description
        chg_dist.attrs.create("description", """"Average change in male-female distance between during and prior to stimulation.
        The changes represent the average changes over all stimulus intensities. Male-female.""")

        chg_wingang_mf = fdat.create_dataset("male_female/change_wingangle", data=change_wingang_m)
        # Label the dimensions 
        chg_wingang_mf.dims[0].label = "animals"
        # Add description
        chg_wingang_mf.attrs.create("description", """"Average change in the summed male wing angle between during and prior to stimulation.
        The changes represent the average changes over all stimulus intensities. Male-female.""")


    if sm_cond:
        # Store the average tracking metrics
        tracks_male = fdat.create_dataset("/male/med_tracks_male", data=tracking_med_m)
        # Label the dimensions
        tracks_male.dims[0].label = "time [s]"
        tracks_male.dims[1].label = "tracking metrics"
        # Save an index and a description
        tracks_male.attrs.create("index", np.array(indices[0], dtype="S"))
        tracks_male.attrs.create("description", """Tracking metrics median for all animals and stimulus intensities. Solitary male""")

        # Store the changes upon stimulation
        chg_velo_male = fdat.create_dataset("male/change_velocity_male", data=change_velo)
        # Label the dimensions 
        chg_velo_male.dims[0].label = "animals"
        # Save descriptions
        chg_velo_male.attrs.create("description", """"Median change in velocity between during and prior to stimulation.
        The changes represent the average changes over all stimulus intensities. Solitary male""")

        chg_wingang = fdat.create_dataset("male/change_wingangle", data=change_wingang)
        # Label the dimensions 
        chg_wingang.dims[0].label = "animals"
        # Add description
        chg_wingang.attrs.create("description", """"Median change in the summed male wing angle between during and prior to stimulation.
        The changes represent the average changes over all stimulus intensities. Solitary male.""")

finally:
    # Close the data file
    fdat.close()