HOMEWORK 4
download the spatial-temporal dataset, read into notebook and prepare the data frame, make sure to include columns of: timestamp, latitude, longitude, tortoise ID, and other information

In [97]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

import geopandas as gpd
import shapely
import skmob
from skmob.measures.individual import jump_lengths
from skmob.tessellation import tilers
from skmob.preprocessing import detection
from skmob.preprocessing import clustering
from skmob.preprocessing import filtering
from skmob.preprocessing import compression



pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 200)
import warnings
warnings.simplefilter("ignore")
import os
"""
    sorting the event data frame by user ID and then by time is the most critical step.
"""
os.chdir('C:/Users/christian fink/Math485/Math485_2')
events = pd.read_csv("Galapagos Tortoise Movement Ecology Programme_2009-2018.csv", sep=',', parse_dates=[2])
new_cols = pd.Series(events.columns.values).str.replace(pat = r"[-:]", repl="_", regex=True)
tortoise = events.set_axis(labels = new_cols, axis =1)
tortoise.drop(tortoise.index[tortoise["manually_marked_outlier"].notna()], inplace = True)
tortoise.drop(columns =["manually_marked_outlier"], inplace=True)
tortoise.drop(index = tortoise.index[tortoise.isna().any(axis=1)], inplace=True)
cols = ['event_id', 'timestamp', 'location_long', 'location_lat',
       'eobs_temperature', 'ground_speed', 'heading', 'height_above_ellipsoid',
       'individual_local_identifier', 'individual_taxon_canonical_name']
tortoise.drop(columns=tortoise.columns.difference(cols), inplace=True)

tortoise.insert(loc = 2, column = "timestamp_UTC",
                value = tortoise['timestamp'].apply(lambda x: x.tz_localize(tz='UTC'))
                )
tortoise.insert(loc = 3, column = "timestamp_local", 
                value = tortoise["timestamp_UTC"].apply(lambda x: x.tz_convert(tz='US/Pacific'))
                )
tortoise.sort_values(by = ["individual_local_identifier", "timestamp_local"], inplace=True)
tortoise.insert(loc = 4, column = "minute_diff", 
                value = tortoise.groupby(['individual_local_identifier'])["timestamp_local"].diff()\
                    .apply(lambda x: x/np.timedelta64(1, 'm')).fillna(0).astype('int64')
                    )
tortoise['year'] = tortoise["timestamp_local"].dt.year
tortoise['month'] = tortoise["timestamp_local"].dt.month
tortoise['date'] = tortoise["timestamp_local"].dt.date
tortoise["month_since2009"] = (tortoise.year - tortoise.year.min())*12 + tortoise.month
tortoise["day_since_min"] = (tortoise["timestamp_local"] - tortoise.timestamp_local.min()).apply(lambda x: x.days)
tortoise.reset_index(drop=True, inplace=True)
tortoise_gdf = gpd.GeoDataFrame(tortoise)
tortoise_gdf["geometry"] = gpd.points_from_xy(tortoise_gdf.location_long, tortoise_gdf.location_lat)
tortoise_trajectory = skmob.TrajDataFrame(tortoise, latitude='location_lat', longitude='location_long', \
    user_id='individual_local_identifier', datetime='timestamp_local', \
)
tortoise_jl = jump_lengths(tortoise_trajectory)
tortoise_jl["jump_lengths"] = \
    tortoise_jl["jump_lengths"].apply(
        lambda x: np.append(x,np.nan).tolist())
tortoise_jl = tortoise_jl.explode(["jump_lengths"],
                ignore_index=True).reset_index(drop=True)
tortoise["jump_length"] = tortoise_jl.jump_lengths
tortoise_trajectory["jump_length"] = tortoise_jl["jump_lengths"]

100%|██████████| 96/96 [00:21<00:00,  4.55it/s]


one simple method to detect migration might be through distance of movement: suppose you select day as the unit of time, if you can compute movement scale for each unit, the days with larger scale of movement are the days of migration
you can use radius of gyration, or total trajectory length, to measure scale of movement
you can implement haversine function, or use existing library functions
you can design your other ways of migration detection and implement them, for instance, you may want to investigate using the speed and heading in the dataset


In [98]:
tortoise.head()

Unnamed: 0,event_id,timestamp,timestamp_UTC,timestamp_local,minute_diff,location_long,location_lat,eobs_temperature,ground_speed,heading,height_above_ellipsoid,individual_taxon_canonical_name,individual_local_identifier,year,month,date,month_since2009,day_since_min,jump_length
0,46119277,2010-09-17 17:01:25.998,2010-09-17 17:01:25.998000+00:00,2010-09-17 10:01:25.998000-07:00,0,-90.241889,-0.635366,18,0.0,334.23,181.1,Chelonoidis donfaustoi,Alison,2010,9,2010-09-17,21,506,0.043753
1,46119278,2010-09-17 18:00:58.001,2010-09-17 18:00:58.001000+00:00,2010-09-17 11:00:58.001000-07:00,59,-90.24223,-0.635563,19,0.21,19.01,143.6,Chelonoidis donfaustoi,Alison,2010,9,2010-09-17,21,506,0.005815
2,46119279,2010-09-17 19:00:56.001,2010-09-17 19:00:56.001000+00:00,2010-09-17 12:00:56.001000-07:00,59,-90.24221,-0.635515,22,0.15,359.14,171.4,Chelonoidis donfaustoi,Alison,2010,9,2010-09-17,21,506,0.017794
3,46119280,2010-09-17 20:00:29.000,2010-09-17 20:00:29+00:00,2010-09-17 13:00:29-07:00,59,-90.242227,-0.635356,19,0.75,19.66,156.2,Chelonoidis donfaustoi,Alison,2010,9,2010-09-17,21,506,0.016623
4,46119281,2010-09-17 21:00:56.000,2010-09-17 21:00:56+00:00,2010-09-17 14:00:56-07:00,60,-90.242101,-0.635276,17,0.08,342.75,160.4,Chelonoidis donfaustoi,Alison,2010,9,2010-09-17,21,506,0.003791


In [99]:
import math
def my_haversine(lat1, lon1, lat2, lon2):
    R = 6371e3 # in meters
    phi1 = lat1 * math.pi/180 # φ, λ in radians
    phi2 = lat2 * math.pi/180
    delta_phi = (lat2-lat1) * math.pi/180
    delta_lambda = (lon2-lon1) * math.pi/180
    a = math.sin(delta_phi/2) * math.sin(delta_phi/2) + \
        math.cos(phi1) * math.cos(phi2) * \
        math.sin(delta_lambda/2) * math.sin(delta_lambda/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) # atan2(x,y) = arctan(x/y) if y > 0
    d = R * c # unit is in meters
    return d
from skmob.measures.individual import radius_of_gyration
tortoise_radius_gyration = radius_of_gyration(tortoise_trajectory)
tortoise_radius_gyration = tortoise_radius_gyration.sort_values(by="radius_of_gyration", ascending=False)

100%|██████████| 96/96 [00:20<00:00,  4.76it/s]


In [100]:
tortoise_stay_location = detection.stay_locations(tortoise_trajectory, 
    minutes_for_a_stop=400,
    spatial_radius_km=0.05, leaving_time=True)

In [101]:
turtle_name = 'Alison'

def get_turtle_stay_locations(turtle_name, tortoise_stay_location):
    return tortoise_stay_location.query(f"uid == '{turtle_name}'")

def analyze_turtle_migration(turtle_name, tortoise, speed_threshold=4, heading_threshold=20):
    tortoise = tortoise[tortoise['individual_local_identifier'] == turtle_name].copy()
    tortoise['is_migrating_speed'] = tortoise['ground_speed'] > speed_threshold
    tortoise['heading_diff'] = tortoise['heading'].diff().abs()
    tortoise['is_migrating_heading'] = tortoise['heading_diff'] < heading_threshold
    tortoise['is_migrating'] = tortoise['is_migrating_speed'] & tortoise['is_migrating_heading']
    return tortoise[['event_id', 'timestamp', 'ground_speed', 'heading', 'is_migrating', 'individual_local_identifier']]

def get_migration_timestamps(turtle_name, tortoise):
    result = analyze_turtle_migration(turtle_name, tortoise)
    migrating_times = result[result['is_migrating'] == True]
    return migrating_times[['timestamp']]

def combine_stay_and_migration(turtle_name, tortoise_stay_location, tortoise):
    stay_locations = get_turtle_stay_locations(turtle_name, tortoise_stay_location)
    migration_times = get_migration_timestamps(turtle_name, tortoise)
    combined_data = []
    for i in range(len(stay_locations) - 1):
        stay_end = stay_locations.iloc[i]['timestamp'] 
        next_stay_start = stay_locations.iloc[i + 1]['timestamp']
        migration_between = migration_times[
            (migration_times['timestamp'] > stay_end) &
            (migration_times['timestamp'] < next_stay_start)]
        if not migration_between.empty:
            combined_data.append({
                'migration_start': migration_between.iloc[0]['timestamp'],
                'migration_end': next_stay_start})
    return combined_data

combine_stay_and_migration(turtle_name, tortoise_stay_location, tortoise)


[{'migration_start': Timestamp('2011-09-08 14:00:39'),
  'migration_end': Timestamp('2011-09-08 22:00:56')},
 {'migration_start': Timestamp('2016-04-20 17:01:16'),
  'migration_end': Timestamp('2016-04-20 20:00:57.998000')},
 {'migration_start': Timestamp('2018-03-05 15:01:02'),
  'migration_end': Timestamp('2018-03-05 23:00:43.998000')}]

run your migration detection on following tortoises:
Alison, Marilyn, Kitty, Connor, Helber, Christian, Steve Devine
for each tortoise conclude on which dates did the tortoise start the migration, and on which dates did they end the migration


In [102]:
Alison = combine_stay_and_migration('Alison', tortoise_stay_location, tortoise)
Marilyn = combine_stay_and_migration('Marilyn', tortoise_stay_location, tortoise)
Kitty = combine_stay_and_migration('Kitty', tortoise_stay_location, tortoise)
Connor = combine_stay_and_migration('Connor', tortoise_stay_location, tortoise)
Helber = combine_stay_and_migration('Helber', tortoise_stay_location, tortoise)
Christian = combine_stay_and_migration('Christian', tortoise_stay_location, tortoise)
Steve = combine_stay_and_migration('Steve', tortoise_stay_location, tortoise)
Devine = combine_stay_and_migration('Devine', tortoise_stay_location, tortoise)
Christian


[{'migration_start': Timestamp('2011-07-22 12:00:37.999000'),
  'migration_end': Timestamp('2011-07-22 16:00:31.998000')},
 {'migration_start': Timestamp('2014-02-08 14:01:36'),
  'migration_end': Timestamp('2014-02-09 13:00:13.999000')},
 {'migration_start': Timestamp('2014-07-10 13:00:27'),
  'migration_end': Timestamp('2014-07-10 16:00:56.998000')},
 {'migration_start': Timestamp('2014-12-18 22:01:49.999000'),
  'migration_end': Timestamp('2014-12-19 15:00:18.998000')},
 {'migration_start': Timestamp('2017-10-28 17:00:35.998000'),
  'migration_end': Timestamp('2017-10-29 13:00:45.999000')},
 {'migration_start': Timestamp('2018-01-19 17:00:38.001000'),
  'migration_end': Timestamp('2018-01-25 12:00:16.001000')},
 {'migration_start': Timestamp('2018-05-21 20:01:11'),
  'migration_end': Timestamp('2018-05-22 12:00:31.998000')}]