# Trajectory Output
Function to output the trajectories of individuals ```.mymridon``` experiment file to a parquet file which can then be read in R. <br><br>
It is much easier to work with the tracjectories from the myrmidon file, but in case they have to be used in a specific environment (e.g., on R in Windows OS) the trajectories can be output in a parquet file format. <br><br>
There are three versions of the function of which 2 are very similar - one uses pandas and the other uses Dask to work with large datasets. the third uses a matcher while querying the `py-fort-myrmidon` API to extract only the trajectories of specifc antIDs

In [1]:
import py_fort_myrmidon as fm
from datetime import datetime, timedelta  # For convenient handling of time and date
import pandas as pd  # Used to create a dataframe, similar to the structure used in R
import dask.dataframe as dd  # Used to work with large datasets that don't fit in memory

## Output entire trajectories

### Function to output trajectories using pandas

In [2]:
def daily_trajectory_to_parquet(start_time, end_time, exp):
    """
    Function to extract daily trajectories as a parquet file, grouped by AntID. While it is setup to extract daily trajectories, it can work for any arbitrary time duration
    :param start_time: The start datetime object. this will be converted to a fort-myrmidon Time object
    :param end_time: The end datetime object. this will be converted to a fort-myrmidon Time object
    :param exp: The name of the experiment i.e., the myrmidon file
    :return: Outputs parquet files containing trajectories grouped by AntID. The files will contain AntID, Space, StartTime of each trajectory, Pos_time (time difference in seconds from the StartTime of the trajectory), Pos_X and Pos_Y (corresponding to the X and Y coordinates at that specific time/frame)
    """
    start = datetime.now()
    t_begin = fm.Time(start_time)
    t_stop = fm.Time(end_time)
    trajectory = fm.Query.ComputeAntTrajectories(
        experiment=exp,
        start=t_begin,
        end=t_stop,
        # matcher=queenID,
        maximumGap=fm.Duration.Parse("1000h"),
    )
    # Make a list of lists with trajectory values needed. Position is an array of 5 columns, so specific columns are called
    traj_list = [
        [
            trajectory.Ant,
            trajectory.Space,
            trajectory.Start.ToDateTime(),
            trajectory.Positions[:, 0],
            trajectory.Positions[:, 1],
            trajectory.Positions[:, 2],
        ]
        for trajectory in trajectory
    ]
    # Make the list into a dataframe
    traj_df = pd.DataFrame(
        traj_list, columns=["AntID", "Space", "StartTime", "Pos_time", "Pos_X", "Pos_Y"]
    )
    # Explode columns which are in the form of lists to expand the dataframe
    traj_df = traj_df.explode(column=["Pos_time", "X_coor", "Y_coor"])
    # Create file name
    f_name = "Trajectories_{}_{}.parquet".format(
        exp.Name, start_time.strftime("%Y%m%d")
    )
    # Save as parquet
    traj_df.to_parquet(f_name, partition_on=["AntID"])
    end = datetime.now()
    print("Parquet file for", start_time.strftime("%d-%m-%Y"), "output in", end - start)

### Function to output trajectories using dask

In [4]:
def daily_trajectory_to_parquet_dask(start_time, end_time, exp):
    """
    Function to extract daily trajectories as a parquet file, grouped by AntID. This variation of the function uses dask for very large datasets which don't stay in memory. While it is setup to extract daily trajectories, it can work for any arbitrary time duration
    :param start_time: The start datetime object. this will be converted to a fort-myrmidon Time object
    :param end_time: The end datetime object. this will be converted to a fort-myrmidon Time object
    :param exp: The name of the experiment i.e., the myrmidon file
    :return: Outputs parquet files containing trajectories grouped by AntID. The files will contain AntID, Space, StartTime of each trajectory, Pos_time (time difference in seconds from the StartTime of the trajectory), Pos_X and Pos_Y (corresponding to the X and Y coordinates at that specific time/frame)
    """
    start = datetime.now()
    t_begin = fm.Time(start_time)
    t_stop = fm.Time(end_time)
    trajectory = fm.Query.ComputeAntTrajectories(
        experiment=exp,
        start=t_begin,
        end=t_stop,
        # matcher=queenID,
        maximumGap=fm.Duration.Parse("1000h"),
    )
    # Make a list of lists with trajectory values needed. Position is an array of 5 columns, so specific columns are called
    traj_list = [
        [
            trajectory.Ant,
            trajectory.Space,
            trajectory.Start.ToDateTime(),
            trajectory.Positions[:, 0],
            trajectory.Positions[:, 1],
            trajectory.Positions[:, 2],
        ]
        for trajectory in trajectory
    ]
    # Make the list into a dataframe
    traj_df = pd.DataFrame(
        traj_list, columns=["AntID", "Space", "StartTime", "Pos_time", "Pos_X", "Pos_Y"]
    )
    # Explode columns which are in the form of lists to expand the dataframe
    # Convert to dask dataframe
    traj_dd = dd.from_pandas(traj_df, npartitions=20)
    traj_dd = traj_dd.explode(column=["Pos_time", "Pos_X", "Pos_Y"])
    # Coerce object columns to integer
    traj_dd["Pos_time"] = dd.to_numeric(traj_dd["Pos_time"], errors="coerce")
    traj_dd["Pos_X"] = dd.to_numeric(traj_dd["Pos_X"], errors="coerce")
    traj_dd["Pos_Y"] = dd.to_numeric(traj_dd["Pos_Y"], errors="coerce")
    # Create file name
    f_name = "Trajectories_{}_{}.parquet".format(
        exp.Name, start_time.strftime("%Y%m%d")
    )
    # Save as parquet
    traj_dd.to_parquet(f_name, partition_on=["AntID"])
    end = datetime.now()
    print("Parquet file for", start_time.strftime("%d-%m-%Y"), "output in", end - start)

### Function to output trajectories of focal individuals

In [7]:
def daily_trajectory_to_parquet_focal(start_time, end_time, exp, matcher_query):
    """
    Function to extract daily trajectories as a parquet file, grouped by AntID. While it is setup to extract daily trajectories, it can work for any arbitrary time duration
    :param start_time: The start datetime object. this will be converted to a fort-myrmidon Time object
    :param end_time: The end datetime object. this will be converted to a fort-myrmidon Time object
    :param exp: The name of the experiment i.e., the myrmidon file
    :param matcher_query: The fm matcher corresponding to the focal IDs
    :return: Outputs parquet files containing trajectories grouped by AntID. The files will contain AntID, Space, StartTime of each trajectory, Pos_time (time difference in seconds from the StartTime of the trajectory), Pos_X and Pos_Y (corresponding to the X and Y coordinates at that specific time/frame)
    """
    start = datetime.now()
    t_begin = fm.Time(start_time)
    t_stop = fm.Time(end_time)
    trajectory = fm.Query.ComputeAntTrajectories(
        experiment=exp,
        start=t_begin,
        end=t_stop,
        matcher=matcher_query,
        maximumGap=fm.Duration.Parse("1000h"),
    )
    # Make a list of lists with trajectory values needed. Position is an array of 5 columns, so specific columns are called
    traj_list = [
        [
            trajectory.Ant,
            trajectory.Space,
            trajectory.Start.ToDateTime(),
            trajectory.Positions[:, 0],
            trajectory.Positions[:, 1],
            trajectory.Positions[:, 2],
        ]
        for trajectory in trajectory
    ]
    # Make the list into a dataframe
    traj_df = pd.DataFrame(
        traj_list, columns=["AntID", "Space", "StartTime", "Pos_time", "Pos_X", "Pos_Y"]
    )
    # Explode columns which are in the form of lists to expand the dataframe
    traj_df = traj_df.explode(column=["Pos_time", "X_coor", "Y_coor"])
    # Create file name
    f_name = "Trajectories_{}_{}.parquet".format(
        exp.Name, start_time.strftime("%Y%m%d")
    )
    # Save as parquet
    traj_df.to_parquet(f_name, partition_on=["AntID"])
    end = datetime.now()
    print("Parquet file for", start_time.strftime("%d-%m-%Y"), "output in", end - start)

### Colony Cfel42

In [2]:
# f_myrmidon = "/media/egeorge/Elements/220727_Test3/RemovalTest2_Col42.myrmidon"
f_myrmidon = "/media/egeorge/Elements/Woundcare Experiment1/Cfell_wound_col42.myrmidon"
exp = fm.Experiment.Open(f_myrmidon)

In [3]:
# exp.Ants
# queenID = fm.Matcher.AntID(1)
# t_r = fm_time_range(t_start, t_end)

In [6]:
# exp.Ants
day1_focal = fm.Matcher.AntID(106)
day1_caregivers = fm.Matcher.Or(
    fm.Matcher.AntID(103),
    fm.Matcher.AntID(114),
    fm.Matcher.AntID(2),
    fm.Matcher.AntID(21),
    fm.Matcher.AntID(24),
    fm.Matcher.AntID(3),
    fm.Matcher.AntID(53),
    fm.Matcher.AntID(63),
)
day1_ids = fm.Matcher.Or(day1_focal, day1_caregivers)
day1_start = datetime(2022, 5, 2, 16, 3).astimezone(tz=None)
day1_end = datetime(2022, 5, 2, 22, 3).astimezone(tz=None)

In [None]:
daily_trajectory_to_parquet_focal(day1_start, day1_end, exp, day1_ids)

## Output trajectories with mean coordinates

Instead of outputting the whole trajectory we can also compress it, e.g., by obtaining the mean X and Y coordinate for each ant per second insted of at the experimental frame rate. <br>
In this case we will use a pandas groupby that groups the values to the nearest second and takes the mean.

In [None]:
def trajectory_output_summarised(start_time, end_time, exp):
    """
    Function to extract daily trajectories as a parquet file, grouped by AntID. While it is setup to extract daily trajectories, it can work for any arbitrary time duration
    :param start_time: The start datetime object. this will be converted to a fort-myrmidon Time object
    :param end_time: The end datetime object. this will be converted to a fort-myrmidon Time object
    :param exp: The name of the experiment i.e., the myrmidon file
    :param matcher_query: The fm matcher corresponding to the focal IDs
    :return: Outputs a pandas dataframe containing AntID, Space, Time, X_coordinates and Y_coordinates of each ID averaged over 1 second from the X and Y coordinates. Averagingg is done to have a dataset which can be merged across IDs using at the resolution of 1s.
    """
    start = datetime.now()
    t_begin = fm.Time(start_time)
    t_stop = fm.Time(end_time)
    trajectory = fm.Query.ComputeAntTrajectories(
        experiment=exp,
        start=t_begin,
        end=t_stop,
        # matcher=matcher_query,
        maximumGap=fm.Duration.Parse("1000h"),
        reportProgress=False,
    )
    # Make a list of lists with trajectory values needed. Position is an array of 5 columns, so specific columns are called
    traj_list = [
        [
            trajectory.Ant,
            trajectory.Space,
            trajectory.Start.ToDateTime(),
            trajectory.Positions[:, 0],
            trajectory.Positions[:, 1],
            trajectory.Positions[:, 2],
        ]
        for trajectory in trajectory
    ]
    # Make the list into a dataframe
    traj_df = pd.DataFrame(
        traj_list,
        columns=["AntID", "Space", "StartTime", "Pos_time", "X_coor", "Y_coor"],
    )
    # Explode columns which are in the form of lists to expand the dataframe
    traj_df = traj_df.explode(column=["Pos_time", "X_coor", "Y_coor"])
    # Coerce coordinates to integer
    traj_df["X_coor"] = pd.to_numeric(traj_df["X_coor"], errors="coerce")
    traj_df["Y_coor"] = pd.to_numeric(traj_df["Y_coor"], errors="coerce")
    # Convert Pos_time to timedelta and obtain actual datetime for each trajectory entry
    traj_df["Pos_time"] = pd.to_numeric(traj_df["Pos_time"], errors="coerce")
    traj_df["Pos_time"] = pd.to_timedelta(
        traj_df["Pos_time"], unit="S", errors="coerce"
    )
    traj_df["Time"] = traj_df["StartTime"] + traj_df["Pos_time"]
    # Drop unwanted ccolumns
    traj_df = traj_df.drop(["StartTime", "Pos_time"], axis=1)
    # Reorder columns
    traj_df = traj_df[["AntID", "Space", "Time", "X_coor", "Y_coor"]]
    if traj_df.empty:  # If no trajectories are output
        # empty_row = pd.DataFrame([{'AntID': 'Unknown', 'Space':np.nan, 'Time':np.nan, 'X_coor':np.nan, 'Y_coor':np.nan}]) # Create empty row with unknown as antID
        # traj_df = pd.concat([empty_row]) # Add empty row to dataframe
        print("No trajectories found. Created empty dataframe")
        return traj_df  # Return empty dataframe
    # Obtain average X and Y coordinates per second
    traj_df = (
        traj_df.groupby([pd.Grouper(key="Time", freq="1s"), "AntID", "Space"])
        .agg(X_mean=("X_coor", "mean"), Y_mean=("Y_coor", "mean"))
        .reset_index()
    )
    end = datetime.now()
    # print("Trajectories output in", end-start)
    return traj_df

In [None]:
def trajectory_output_summarised_focal(start_time, end_time, exp, matcher_query):
    """
    Function to extract daily trajectories as a parquet file, grouped by AntID. While it is setup to extract daily trajectories, it can work for any arbitrary time duration
    :param start_time: The start datetime object. this will be converted to a fort-myrmidon Time object
    :param end_time: The end datetime object. this will be converted to a fort-myrmidon Time object
    :param exp: The name of the experiment i.e., the myrmidon file
    :param matcher_query: The fm matcher corresponding to the focal IDs
    :return: Outputs a pandas dataframe containing AntID, Space, Time, X_coordinates and Y_coordinates of each ID averaged over 1 second from the X and Y coordinates. Averagingg is done to have a dataset which can be merged across IDs using at the resolution of 1s.
    """
    start = datetime.now()
    t_begin = fm.Time(start_time)
    t_stop = fm.Time(end_time)
    trajectory = fm.Query.ComputeAntTrajectories(
        experiment=exp,
        start=t_begin,
        end=t_stop,
        matcher=matcher_query,
        maximumGap=fm.Duration.Parse("1000h"),
        reportProgress=False,
    )
    # Make a list of lists with trajectory values needed. Position is an array of 5 columns, so specific columns are called
    traj_list = [
        [
            trajectory.Ant,
            trajectory.Space,
            trajectory.Start.ToDateTime(),
            trajectory.Positions[:, 0],
            trajectory.Positions[:, 1],
            trajectory.Positions[:, 2],
        ]
        for trajectory in trajectory
    ]
    # Make the list into a dataframe
    traj_df = pd.DataFrame(
        traj_list,
        columns=["AntID", "Space", "StartTime", "Pos_time", "X_coor", "Y_coor"],
    )
    # Explode columns which are in the form of lists to expand the dataframe
    traj_df = traj_df.explode(column=["Pos_time", "X_coor", "Y_coor"])
    # Coerce coordinates to integer
    traj_df["X_coor"] = pd.to_numeric(traj_df["X_coor"], errors="coerce")
    traj_df["Y_coor"] = pd.to_numeric(traj_df["Y_coor"], errors="coerce")
    # Convert Pos_time to timedelta and obtain actual datetime for each trajectory entry
    traj_df["Pos_time"] = pd.to_numeric(traj_df["Pos_time"], errors="coerce")
    traj_df["Pos_time"] = pd.to_timedelta(
        traj_df["Pos_time"], unit="S", errors="coerce"
    )
    traj_df["Time"] = traj_df["StartTime"] + traj_df["Pos_time"]
    # Drop unwanted ccolumns
    traj_df = traj_df.drop(["StartTime", "Pos_time"], axis=1)
    # Reorder columns
    traj_df = traj_df[["AntID", "Space", "Time", "X_coor", "Y_coor"]]
    if traj_df.empty:  # If no trajectories are output
        # empty_row = pd.DataFrame([{'AntID': 'Unknown', 'Space':np.nan, 'Time':np.nan, 'X_coor':np.nan, 'Y_coor':np.nan}]) # Create empty row with unknown as antID
        # traj_df = pd.concat([empty_row]) # Add empty row to dataframe
        print("No trajectories found. Created empty dataframe")
        return traj_df  # Return empty dataframe
    # Obtain average X and Y coordinates per second
    traj_df = (
        traj_df.groupby([pd.Grouper(key="Time", freq="1s"), "AntID", "Space"])
        .agg(X_mean=("X_coor", "mean"), Y_mean=("Y_coor", "mean"))
        .reset_index()
    )
    end = datetime.now()
    # print("Trajectories output in", end-start)
    return traj_df

## Output trajectories for all ants

In [None]:
def daily_trajectory_to_parquet_optimised(start_time, end_time, exp):
    """
    Function to extract daily trajectories as a parquet file, grouped by AntID. While it is setup to extract daily trajectories, it can work for any arbitrary time duration
    :param start_time: The start datetime object. this will be converted to a fort-myrmidon Time object
    :param end_time: The end datetime object. this will be converted to a fort-myrmidon Time object
    :param exp: The name of the experiment i.e., the myrmidon file
    :return: Outputs parquet files containing trajectories grouped by AntID. The files will contain AntID, Space, StartTime of each trajectory, Pos_time (time difference in seconds from the StartTime of the trajectory), Pos_X and Pos_Y (corresponding to the X and Y coordinates at that specific time/frame)
    """
    start = datetime.now()
    t_begin = fm.Time(start_time)
    t_stop = fm.Time(end_time)
    trajectory = fm.Query.ComputeAntTrajectories(
        experiment=exp,
        start=t_begin,
        end=t_stop,
        # matcher=queenID,
        maximumGap=fm.Duration.Parse("1000h"),
    )
    # Make a list of lists with trajectory values needed. Position is an array of 5 columns, so specific columns are called
    traj_list = [
        [
            trajectory.Ant,
            trajectory.Space,
            trajectory.Start.ToDateTime(),
            trajectory.Positions[:, 0],
            trajectory.Positions[:, 1],
            trajectory.Positions[:, 2],
        ]
        for trajectory in trajectory
    ]
    # Make the list into a dataframe
    traj_df = pd.DataFrame(
        traj_list, columns=["AntID", "Space", "StartTime", "Pos_time", "Pos_X", "Pos_Y"]
    )
    # Explode columns which are in the form of lists to expand the dataframe
    traj_df = traj_df.explode(column=["Pos_time", "X_coor", "Y_coor"])
    # Create file name
    f_name = "Trajectories_{}_{}.parquet".format(
        exp.Name, start_time.strftime("%Y%m%d")
    )
    # Save as parquet
    traj_df.to_parquet(f_name, partition_on=["AntID"])
    end = datetime.now()
    print("Parquet file for", start_time.strftime("%d-%m-%Y"), "output in", end - start)

In [2]:
f_myrmidon = "/media/ebiag/Ebi-3/InfectionExp_Cfel13/InfectionExp_Cfel13.myrmidon"
exp = fm.Experiment.Open(f_myrmidon)

In [4]:
# Arbirtarily choose 2nd baseline day
day_start = datetime(2023, 4, 20, 0, 0).astimezone(tz=None)
day_end = datetime(2023, 4, 21, 0, 0).astimezone(tz=None)

In [None]:
t_begin = fm.Time(day_start)
t_stop = fm.Time(day_end)

# Compute trajectories
trajectories = fm.Query.ComputeAntTrajectories(
    experiment=exp,
    start=t_begin,
    end=t_stop,
    maximumGap=fm.Duration.Parse("1000h"),  # Adjust gap as needed
    # matcher=queenID, # Uncomment if you need to filter by specific IDs
)

Computing ant trajectories: 100%|██| 1440/1440 [00:28<00:00, 51.27tracked min/s]


In [None]:
# Get a list of trajectories
traj_list = []
for trajectory in trajectories:
    # Ensure Positions is not empty
    if trajectory.Positions.shape[0] > 0:
        traj_list.append(
            [
                trajectory.Ant,  # Get Ant ID as integer
                trajectory.Space,  # Get Space ID as integer
                trajectory.Start.ToDateTime(),
                trajectory.Positions[:, 0],  # Time offset in seconds
                trajectory.Positions[:, 1],  # X coordinate
                trajectory.Positions[:, 2],  # Y coordinate
            ]
        )

In [None]:
# Convert to dataframe
traj_df = pd.DataFrame(
    traj_list,
    columns=["AntID", "Space", "StartTime", "Pos_time_offset", "Pos_X", "Pos_Y"],
)

In [None]:
# Explode trajectories to get one row per time point and ant
traj_df = traj_df.explode(
    column=["Pos_time_offset", "Pos_X", "Pos_Y"], ignore_index=True
)

In [11]:
# Coerce coordinates to integer
traj_df["X_coor"] = pd.to_numeric(traj_df["Pos_X"], errors="coerce")
traj_df["Y_coor"] = pd.to_numeric(traj_df["Pos_Y"], errors="coerce")

In [12]:
traj_df.head()

Unnamed: 0,AntID,Space,StartTime,Pos_time_offset,Pos_X,Pos_Y,X_coor,Y_coor
0,97,1,2023-04-20 00:00:00.442248,0.0,1546.975576,4375.745889,1546.975576,4375.745889
1,97,1,2023-04-20 00:00:00.442248,0.200003,1546.996396,4375.723201,1546.996396,4375.723201
2,97,1,2023-04-20 00:00:00.442248,0.400005,1546.972174,4375.789911,1546.972174,4375.789911
3,97,1,2023-04-20 00:00:00.442248,0.600008,1547.03356,4375.826449,1547.03356,4375.826449
4,97,1,2023-04-20 00:00:00.442248,1.200014,1547.207982,4376.154674,1547.207982,4376.154674


In [None]:
# Convert Pos_time to timedelta and obtain actual datetime for each trajectory entry
traj_df["Pos_time"] = pd.to_numeric(traj_df["Pos_time_offset"], errors="coerce")
traj_df["Pos_time"] = pd.to_timedelta(traj_df["Pos_time"], unit="S", errors="coerce")
traj_df["Time"] = traj_df["StartTime"] + traj_df["Pos_time"]

In [15]:
# Select specific columns
traj_df = traj_df[["AntID", "Space", "Time", "X_coor", "Y_coor"]]
traj_df.head()

Unnamed: 0,AntID,Space,Time,X_coor,Y_coor
0,97,1,2023-04-20 00:00:00.442248,1546.975576,4375.745889
1,97,1,2023-04-20 00:00:00.642251,1546.996396,4375.723201
2,97,1,2023-04-20 00:00:00.842253,1546.972174,4375.789911
3,97,1,2023-04-20 00:00:01.042256,1547.03356,4375.826449
4,97,1,2023-04-20 00:00:01.642262,1547.207982,4376.154674


In [17]:
# Filename and output
f_name = "Trajectories_Cfel13_20240420.parquet"
traj_df.to_parquet(f_name)