In [1]:
import os
import time

import dill
import matplotlib.pyplot as plt
import pandas as pd

from collab.foraging import central_park_birds as cpb
from collab.foraging import toolkit as ft
from collab.utils import find_repo_root

root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath("."))))

# the smoke test code is to make sure that the code runs without error
# during continuous integration
# can be ignored by the reader
smoke_test = "CI" in os.environ
frames = 150 if smoke_test else 300
frames_sps = 150 if smoke_test else 2300
sampling_rate = 0.001 if smoke_test else 0.01

notebook_starts = time.time()

  from .autonotebook import tqdm as notebook_tqdm


Import raw .csv files of positions and angles for each age

In [2]:
fish_raw_2wpf = pd.read_csv(
    os.path.join(
        root,
        "data/fish_all_ages/2wpf_091522_6.csv",
    )
)

fish_raw_4wpf = pd.read_csv(
    os.path.join(
        root,
        "data/fish_all_ages/4wpf_060222_1.csv",
    )
)

fish_raw_6wpf = pd.read_csv(
    os.path.join(
        root,
        "data/fish_all_ages/6wpf_061522_1.csv",
    )
)

fish_raw_8wpf = pd.read_csv(
    os.path.join(
        root,
        "data/fish_all_ages/8wpf_091622_2.csv",
    )
)

Remove all frames where *any* fish goes missing


PS: I am keeping the unprocessed data files in the repo for now, just in case I want to use a different method to deal with nans later on

PPS: This could potentially be packaged into a function in toolkit if it is of general use?

In [3]:
#find rows of dataframe with nan values 
nan_rows = fish_raw_2wpf.index[fish_raw_2wpf.isna().any(axis=1)]

#find timepoints corresponding to these rows (have to do this since dataframe is flattened in the fish dimension)
timepoints = fish_raw_2wpf["time"][nan_rows]

#filter out those timepoints
fish_filtered_2wpf = fish_raw_2wpf[~fish_raw_2wpf["time"].isin(timepoints)] 

#repeating for other ages
#4wpf
nan_rows = fish_raw_4wpf.index[fish_raw_4wpf.isna().any(axis=1)]
timepoints = fish_raw_4wpf["time"][nan_rows]
fish_filtered_4wpf = fish_raw_4wpf[~fish_raw_4wpf["time"].isin(timepoints)] 

#6wpf
nan_rows = fish_raw_6wpf.index[fish_raw_6wpf.isna().any(axis=1)]
timepoints = fish_raw_6wpf["time"][nan_rows]
fish_filtered_6wpf = fish_raw_6wpf[~fish_raw_6wpf["time"].isin(timepoints)] 

#8wpf
nan_rows = fish_raw_8wpf.index[fish_raw_8wpf.isna().any(axis=1)]
timepoints = fish_raw_8wpf["time"][nan_rows]
fish_filtered_8wpf = fish_raw_8wpf[~fish_raw_8wpf["time"].isin(timepoints)] 

2 week old fish are in a smaller arena (150mm diameter) compared to older fish (300mm diameter). Different ages also have different characteristic length scales (e.g., swimming speeds, body length, interaction distance)

To allow for comparison across ages, we need the distance between grid points to be the same in absolute units for all ages.

Choose:

2wpf: 90 x 90 grid spanning 0 to 150mm in each direction

4-8wpf: 180 x 180 grid spanning 0 to 300mm in each direction

In [6]:
gridSizes = {
    '2wpf' : 90,
    '4wpf' : 180,
    '6wpf' : 180,
    '8wpf' : 180
}

gridMaxes = {
    '2wpf' : 150,
    '4wpf' : 300,
    '6wpf' : 300,
    '8wpf' : 300
}

fish_filtered_2wpf =fish_filtered_2wpf.rename(columns={"fish": "forager"})
fish_filtered_4wpf =fish_filtered_4wpf.rename(columns={"fish": "forager"})
fish_filtered_6wpf =fish_filtered_6wpf.rename(columns={"fish": "forager"})
fish_filtered_8wpf =fish_filtered_8wpf.rename(columns={"fish": "forager"})

#no subsampling in time for now

fish_filtered_2wpf = ft.rescale_to_grid(fish_filtered_2wpf, size=gridSizes['2wpf'], gridMin=0, gridMax=gridMaxes['2wpf'])
fish_2wpf_obj = ft.object_from_data(
    fish_filtered_2wpf, grid_size=gridSizes['2wpf'], frames=fish_filtered_2wpf["time"].max(), calculate_step_size_max=True
)

TypeError: rescale_to_grid() got an unexpected keyword argument 'gridMin'