In [57]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

import geopandas as gpd
import shapely
import skmob
from skmob.measures.individual import jump_lengths
from skmob.tessellation import tilers
from skmob.preprocessing import detection, clustering
from skmob.preprocessing import filtering


pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 200)
import warnings
warnings.simplefilter("ignore")

In [58]:
import os
os.chdir("C:/Users/luke/OneDrive/Data Science Course")
events = pd.read_csv("Galapagos Tortoise Movement Ecology Programme_2009-2018.csv", sep=',', parse_dates=[2])
new_cols = pd.Series(events.columns.values).str.replace(pat = r"[-:]", repl="_", regex=True)
tortoise = events.set_axis(labels = new_cols, axis =1)
tortoise.drop(tortoise.index[tortoise["manually_marked_outlier"].notna()], inplace = True)
tortoise.drop(columns =["manually_marked_outlier"], inplace=True)
tortoise.drop(index = tortoise.index[tortoise.isna().any(axis=1)], inplace=True)
cols = ['event_id', 'timestamp', 'location_long', 'location_lat',
       'eobs_temperature', 'ground_speed', 'heading', 'height_above_ellipsoid',
       'individual_local_identifier', 'individual_taxon_canonical_name']
tortoise.drop(columns=tortoise.columns.difference(cols), inplace=True)

tortoise.insert(loc = 2, column = "timestamp_UTC",
                value = tortoise['timestamp'].apply(lambda x: x.tz_localize(tz='UTC'))
                )
tortoise.insert(loc = 3, column = "timestamp_local", 
                value = tortoise["timestamp_UTC"].apply(lambda x: x.tz_convert(tz='US/Pacific'))
                )
tortoise.sort_values(by = ["individual_local_identifier", "timestamp_local"], inplace=True)
tortoise.insert(loc = 4, column = "minute_diff", 
                value = tortoise.groupby(['individual_local_identifier'])["timestamp_local"].diff()\
                    .apply(lambda x: x/np.timedelta64(1, 'm')).fillna(0).astype('int64')
                    )
tortoise['year'] = tortoise["timestamp_local"].dt.year
tortoise['month'] = tortoise["timestamp_local"].dt.month
tortoise['date'] = tortoise["timestamp_local"].dt.date

In [59]:
import movingpandas as mpd
import geopandas as gpd

gdf_tortoise = gpd.GeoDataFrame(tortoise)
gdf_tortoise["geometry"] = gpd.points_from_xy(gdf_tortoise.location_long, gdf_tortoise.location_lat)

conor = tortoise.query("individual_local_identifier == 'Connor'")
alison = tortoise.query("individual_local_identifier == 'Alison'")
marilyn = tortoise.query("individual_local_identifier == 'Marilyn'")
christian = tortoise.query("individual_local_identifier == 'Christian'")
steve_devine = tortoise.query("individual_local_identifier == 'Steve Devine'")

gdf_alison = gpd.GeoDataFrame(alison)
gdf_alison["geometry"] = gpd.points_from_xy(gdf_alison.location_long, gdf_alison.location_lat)
gdf_alison_temp = gdf_tortoise.query("individual_local_identifier == 'Alison'")

traj_alison = mpd.Trajectory(gdf_alison, 
            traj_id = "Individual_local_identifier", 
            t='timestamp_local')
traj_alison.to_traj_gdf()
final_alison = traj_alison.df


In [60]:
# marilyn
gdf_marilyn= gpd.GeoDataFrame(marilyn)
gdf_marilyn["geometry"] = gpd.points_from_xy(gdf_marilyn.location_long, gdf_marilyn.location_lat)
gdf_marilyn
traj_marilyn = mpd.Trajectory(gdf_marilyn, 
            traj_id = "Individual_local_identifier", 
            t='timestamp_local')

final_marilyn = traj_marilyn.df

In [61]:
# conor
gdf_conor= gpd.GeoDataFrame(conor)
gdf_conor["geometry"] = gpd.points_from_xy(gdf_conor.location_long, gdf_conor.location_lat)
traj_conor = mpd.Trajectory(gdf_conor, 
            traj_id = "Individual_local_identifier", 
            t='timestamp_local')
final_conor = traj_conor.df

In [62]:
# christian
gdf_christian= gpd.GeoDataFrame(christian)
gdf_christian["geometry"] = gpd.points_from_xy(gdf_christian.location_long, gdf_christian.location_lat)
traj_christian = mpd.Trajectory(gdf_christian, 
            traj_id = "Individual_local_identifier", 
            t='timestamp_local')
final_christian = traj_christian.df

In [63]:
# steve_devine
#gdf_steve_devine= gpd.GeoDataFrame(steve_devine)
#gdf_steve_devine["geometry"] = gpd.points_from_xy(gdf_steve_devine.location_long, gdf_steve_devine.location_lat)
#traj_steve_devine = mpd.Trajectory(gdf_steve_devine, 
#            traj_id = "Individual_local_identifier", 
#            t='timestamp_local')
#final_steve_devine = traj_steve_devine.df

In [64]:
import math
import pandas as pd

def my_haversine(lat1, lon1, lat2, lon2):
    R = 6371e3
    phi1=lat1*math.pi/180
    phi2=lat2*math.pi/180
    delta_phi=(lat2 - lat1)*math.pi / 180
    delta_lambda=(lon2 - lon1)*math.pi/180

    a=(math.sin(delta_phi / 2)*math.sin(delta_phi / 2)+math.cos(phi1) * math.cos(phi2) *math.sin(delta_lambda / 2) * math.sin(delta_lambda / 2))
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    distance = R * c  # in meters
    return distance

# Calculate the distance for each trajectory
def calculate_trajectory_distance(df):
    distances = []
    for i in range(1, len(df)):
        lat1, lon1 = df.iloc[i - 1]['location_lat'], df.iloc[i - 1]['location_long']
        lat2, lon2 = df.iloc[i]['location_lat'], df.iloc[i]['location_long']
        distance = my_haversine(lat1, lon1, lat2, lon2)
        distances.append(distance)
    return distances
# Add 0 distance for the first entry
final_alison['distance'] = [0] + calculate_trajectory_distance(final_alison)  
final_conor['distance'] = [0] + calculate_trajectory_distance(final_conor)
final_marilyn['distance'] = [0] + calculate_trajectory_distance(final_marilyn)
final_christian['distance'] = [0] + calculate_trajectory_distance(final_christian)
#final_steve_devine['distance'] = [0] + calculate_trajectory_distance(final_steve_devine)

In [65]:
f_alison = final_alison.drop(['traj_id',  'timestamp', 'minute_diff', 'eobs_temperature', 'ground_speed', 'heading', 'height_above_ellipsoid', 'individual_taxon_canonical_name', 'individual_local_identifier'], axis = 1)
f_alison['date'] = pd.to_datetime(f_alison['date'])
f_alison['monthly_cohort'] = f_alison['date'].dt.to_period('M')
monthly_distance = f_alison.groupby('monthly_cohort')['distance'].sum()
Atop_months = monthly_distance.nlargest(10)
print(Atop_months)
#These are the top 

monthly_cohort
2016-05    11589.889729
2014-04    10952.728432
2013-05    10293.530016
2016-04     9941.790138
2016-01     9833.611398
2015-11     9445.390384
2016-03     9240.490131
2012-12     8846.319688
2013-06     8810.402716
2016-02     8754.238611
Freq: M, Name: distance, dtype: float64


In [66]:
f_conor = final_conor.drop(['traj_id',  'timestamp', 'minute_diff', 'eobs_temperature', 'ground_speed', 'heading', 'height_above_ellipsoid', 'individual_taxon_canonical_name', 'individual_local_identifier'], axis = 1)
f_conor['date'] = pd.to_datetime(f_conor['date'])
f_conor['monthly_cohort'] = f_conor['date'].dt.to_period('M')
monthly_distance = f_conor.groupby('monthly_cohort')['distance'].sum()
Ctop_months = monthly_distance.nlargest(10)
print(Ctop_months)
#These are the top

monthly_cohort
2014-05    13826.096676
2015-05    13001.414401
2012-05    12341.682921
2014-06    12029.277058
2014-07    12016.713804
2015-04    11957.082299
2015-06    11738.341257
2013-08    11657.250229
2012-02    11648.531480
2012-06    11629.406923
Freq: M, Name: distance, dtype: float64


In [67]:
f_christian = final_christian.drop(['traj_id',  'timestamp', 'minute_diff', 'eobs_temperature', 'ground_speed', 'heading', 'height_above_ellipsoid', 'individual_taxon_canonical_name', 'individual_local_identifier'], axis = 1)
f_christian['date'] = pd.to_datetime(f_christian['date'])
f_christian['monthly_cohort'] = f_christian['date'].dt.to_period('M')
monthly_distance = f_christian.groupby('monthly_cohort')['distance'].sum()
Chtop_months = monthly_distance.nlargest(10)
print(Chtop_months)

monthly_cohort
2017-05    21474.955264
2016-04    19735.899017
2014-06    19622.276109
2015-12    19471.238830
2012-01    19327.918998
2011-05    19327.210384
2015-06    19247.302659
2018-06    17711.791801
2012-06    17363.806493
2011-01    16975.427126
Freq: M, Name: distance, dtype: float64


In [68]:
f_marilyn = final_marilyn.drop(['traj_id',  'timestamp', 'minute_diff', 'eobs_temperature', 'ground_speed', 'heading', 'height_above_ellipsoid', 'individual_taxon_canonical_name', 'individual_local_identifier'], axis = 1)
f_marilyn['date'] = pd.to_datetime(f_marilyn['date'])
f_marilyn['monthly_cohort'] = f_marilyn['date'].dt.to_period('M')
monthly_distance = f_marilyn.groupby('monthly_cohort')['distance'].sum()
Mtop_months = monthly_distance.nlargest(10)
Mtop_months

monthly_cohort
2015-12    11931.649231
2013-10    11558.533041
2013-11    11025.342274
2018-07    10888.348494
2017-08    10751.736050
2010-10    10726.712627
2011-01    10634.985477
2015-11    10617.004352
2016-08    10547.724146
2011-02    10414.515627
Freq: M, Name: distance, dtype: float64

In [69]:
#f_steve_devine = final_steve_devine.drop(['traj_id',  'timestamp', 'minute_diff', 'eobs_temperature', 'ground_speed', 'heading', 'height_above_ellipsoid', 'individual_taxon_canonical_name', 'individual_local_identifier'], axis = 1)
#f_steve_devine['date'] = pd.to_datetime(f_steve_devine['date'])
#f_steve_devine['monthly_cohort'] = f_steve_devine['date'].dt.to_period('M')
#monthly_distance = f_steve_devine.groupby('monthly_cohort')['distance'].sum()
#top_3_months = monthly_distance.nlargest(3)
#print(top_3_months)

In [None]:
Overall, the months that the tortoses move seem to be inconsistent