This notebook will read in the corrected track files, match up the tracks with their species and individual IDs, and reshape the data to work with the momentuHMM package.

In [2]:
import pandas as pd
import numpy as np
import os

mount = '/Volumes'

In [3]:
# load data on observations
hhdata = np.load('../data/HH-data.npy', allow_pickle=True)

# load in csv giving the individual IDs for each track (exported from MySQL)
track_ids = pd.read_csv('../data/trackIDs.csv')

# load in csv giving the species IDs for each individual (exported from MySQL)
track_spps = pd.read_csv('../data/trackspps.csv')

In [20]:
# list of usable observations
focal_obs = ['015', '066', '074']
# focal_obs = ['074']

In [21]:
for t in focal_obs:
    for i in hhdata:
        if i['observation'] == t:
            file = mount + i['utm-tracks']
            tracks = np.load(file, allow_pickle = True)
            z,f,c = tracks.shape
            out_arr = np.column_stack((np.repeat(np.arange(z),f),tracks.reshape(z*f,-1)))
            out_df = pd.DataFrame(out_arr, columns = ['track_num', 'lon', 'lat'])
            out_df['frame'] = np.tile(np.arange(f),z)
            out_df['trackID'] = ['ob' + t + '-' + str(int(p)).zfill(4) for p in out_df.track_num]
            out_df['indID'] = [track_ids[track_ids['trackID'] == p]['indID'].values[0] for p in out_df['trackID']]
            out_df['spp'] = [track_spps[track_spps['indID'] == p]['species'].values[0] for p in out_df['indID']]
            clean_tracks = out_df[(out_df['spp'] == 'gz') | (out_df['spp'] == 'pz')]
            clean_tracks.dropna(inplace = True)
            name = 'ob' + t + '_segtracks.csv'
            clean_tracks.to_csv(os.path.join('tracks-for-segmentation', name), index = False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_tracks.dropna(inplace = True)


In [18]:
# t = '015'
# for i in hhdata:
#     if i['observation'] == t:
#         file = mount + i['utm-tracks']
#         tracks = np.load(file, allow_pickle = True)
#         z,f,c = tracks.shape
#         out_arr = np.column_stack((np.repeat(np.arange(z),f),tracks.reshape(z*f,-1)))
#         out_df = pd.DataFrame(out_arr, columns = ['track_num', 'lon', 'lat'])
#         out_df['frame'] = np.tile(np.arange(f),z)

Now each observation's tracks are in the appropriate format for segmentation. In the segmentation script I just have to read all the csvs in, apply whatever smoothing needs to happen, and then append them into a single file. I should only re-run this code on observations that don't already have clean tracks (that is, ones that aren't in the focal_obs list above), or on observations whose tracks have been redone.

Now to smooth the tracks. First import the clean tracks csvs and divide them by track IDs. Rename as required by traja, and then smooth.

In [1]:
import traja
import pandas as pd

In [None]:
df = pd.read_csv('/tracks-for-segmentation/ob015_segtracks.csv')
df = df.rename(columns={'lon':'x', 'lat':'y', 'frame':'time'})
trj = traja.TrajaDataFrame(df)

# get track numbers
# track_nums = np.unique(df['track_num'])
track_nums = 0
for i in track_nums:
    sub_trj = trj[trj['track_num'] == i]
    sub_trj.plot()
    sub_smooth = traja.smooth_sg(sub_trj, w = 30)
    sub_smooth.plot()

In [None]:
df = pd.read_csv('data.csv')

# If x and y columns are named different than "x" and "y", rename them, eg:
df = df.rename(columns={"x_col": "x", "y_col": "y"}) # original column names x_col, y_col

# If the time column doesn't include "time" in the name, similarly rename it to "time"

trj = traja.TrajaDataFrame(df)