In [225]:
def greatCircle(lat1,lon1,lat2,lon2,r=None,verbose=False):
    '''Compute the great circle distance on a sphere
    <lat1>, <lat2>: scalar float or nd-array, latitudes in degree for
                    location 1 and 2.
    <lon1>, <lon2>: scalar float or nd-array, longitudes in degree for
                    location 1 and 2.
    <r>: scalar float, spherical radius.
    Return <arc>: great circle distance on sphere.
    '''
    from numpy import sin, cos
    if r is None:
        r=6371000. #m
    d2r=lambda x:x*np.pi/180
    lat1,lon1,lat2,lon2=map(d2r,[lat1,lon1,lat2,lon2])
    dlon=abs(lon1-lon2)
    numerator=(cos(lat2)*sin(dlon))**2 + \
            (cos(lat1)*sin(lat2) - sin(lat1)*cos(lat2)*cos(dlon))**2
    numerator=np.sqrt(numerator)
    denominator=sin(lat1)*sin(lat2)+cos(lat1)*cos(lat2)*cos(dlon)
    dsigma=np.arctan2(numerator,denominator)
    arc=r*dsigma
    return arc
def getBearing(lat1,lon1,lat2,lon2):
    '''Compute bearing from point 1 to point2
    Args:
        lat1,lat2 (float or ndarray): scalar float or nd-array, latitudes in
            degree for location 1 and 2.
        lon1,lon2 (float or ndarray): scalar float or nd-array, longitudes in
            degree for location 1 and 2.
    Returns:
        theta (float or ndarray): (forward) bearing in degree.
    NOTE that the bearing from P1 to P2 is in general not the same as that
    from P2 to P1.
    '''
    from numpy import sin, cos
    d2r=lambda x:x*np.pi/180
    lat1,lon1,lat2,lon2=map(d2r,[lat1,lon1,lat2,lon2])
    dlon=lon2-lon1
    theta=np.arctan2(sin(dlon)*cos(lat2),
            cos(lat1)*sin(lat2)-sin(lat1)*cos(lat2)*cos(dlon))
    theta=theta/np.pi*180
    theta=(theta+360)%360
    return theta
def getCrossTrackDistance(lat1,lon1,lat2,lon2,lat3,lon3,r=None):
    '''Compute cross-track distance
    Args:
        lat1, lon1 (float): scalar float or nd-array, latitudes and longitudes in
                        degree, start point of the great circle.
        lat2, lon2 (float): scalar float or nd-array, latitudes and longitudes in
                        degree, end point of the great circle.
        lat3, lon3 (float): scalar float or nd-array, latitudes and longitudes in
                        degree, a point away from the great circle.
    Returns:
        dxt (float): great cicle distance between point P3 to the closest point
                  on great circle that connects P1 and P2.
                  NOTE that the sign of dxt tells which side of the 3rd point
                  P3 is on.
    See also getCrossTrackPoint(), getAlongTrackDistance().
    '''
    from numpy import sin
    if r is None:
        r=6371000.  #m
    # get angular distance between P1 and P3
    delta13=greatCircle(lat1,lon1,lat3,lon3,r=1.)
    # bearing between P1, P3
    theta13=getBearing(lat1,lon1,lat3,lon3)*np.pi/180
    # bearing between P1, P2
    theta12=getBearing(lat1,lon1,lat2,lon2)*np.pi/180
    dtheta=np.arcsin(sin(delta13)*sin(theta13-theta12))
    dxt=r*dtheta
    return dxt
def distanceGC(a,b):
    '''Great circle distance
    Args:
        a (tuple): (lat, lon) coordinates of point A.
        b (tuple): (lat, lon) coordinates of point B.
    Returns:
        result (float): great circle distance from A to B, on unit sphere.
    '''
    return greatCircle(a[0],a[1],b[0],b[1],r=1)
def point_line_distanceGC(point,start,end):
    '''Shortest distance between a point and a great circle curve on unit sphere
    Args:
        point (tuple): (lonQ, lat) coordinates of a point on unit sphere.
        start (tuple): (lon, lat) coordinates of the starting point of a curve
            on unit sphere.
        end (tuple): (lon, lat) coordinates of the end point of a curve
            on unit sphere.
    Returns:
        result (float): shortest distance from point to line.
    '''
    if (start == end):
        return distanceGC(point, start)/np.pi*180.
    else:
        dxt=getCrossTrackDistance(start[0],start[1],
                end[0],end[1],
                point[0],point[1],
                r=1)
        dxt=abs(dxt/np.pi*180)
        return dxt
def rdpGC(points, epsilon):
    '''Geodesic version of rdp.
    Args:
        points (list): list of (lon, lat) coordinates on unit sphere.
        epsilon (float): error threshold.
    Returns:
        results (list): a list of (lon, lat) coordinates of simplified curve.
    '''
    dmax = 0.0
    index = 0
    for i in range(1, len(points) - 1):
       
        d = point_line_distanceGC((points[i][2],points[i][3]), (points[0][2],points[0][3]), (points[-1][2],points[-1][3]))
        if d > dmax:
            index = i
            dmax = d
    if dmax >= epsilon:
        results = rdpGC(points[:index+1], epsilon)[:-1] + rdpGC(points[index:], epsilon)
    else:
        results=[points[0],points[-1]]
    return results

In [228]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#Long-tailed ducks GLS 2018.csv  Epsilon:20
#3D flights of European free-tailed bats.csv Epsilon:0.015
#Andean Condor Vultur gryphus Bariloche, Argentina, 2013-2018.csv Epsilon:0.02
#Bald Eagle (Haliaeetus leucocephalus) in the Pacific Northwest.csv Epsilon:0.21
#Black-backed jackal, Etosha National Park, Namibia.csv Epsilon:0.05
#Blue and fin whales Southern California 2014-2015 - Fastloc GPS data Epsilon:0.05
#Caspian Gulls - Poland Epsilon: NA (should be <0.03 => data size  2200  )
#Common Crane Lithuania GPS, 2016 Epsilon: 0.5
#Fin whales Gulf of California 2001 - Argos data Epsilon: 0.1
#Galapagos Albatrosses Epsilon: 0.07
#Long-tailed ducks GLS 2018 Epsilon: 15
df = pd.read_csv('../../data/Blue and fin whales Southern California 2014-2015 - Fastloc GPS data.csv',parse_dates=True)
df['timestamp']= pd.to_datetime(df['timestamp'])


In [229]:
list(df.columns)

['id',
 'timestamp',
 'long',
 'lat',
 'taxonomical_name',
 'tag_local_identifier',
 'individual_local_identifier',
 'study_name']

In [254]:
import json

res=df.dropna(subset=['lat', 'long','timestamp']).sort_values('timestamp',ascending=False).groupby(['study_name', 'individual_local_identifier']).tail(1)

#tupleapp=list(tuple(a.tolist()) for a in arr )
df2=df[["long","lat"]]
arr =df.to_numpy()
#res=rdpGC(tupleapp,0.005)
mydict={}
i=1
epsilon=0.005 
res2=df.dropna(subset=['lat', 'long','timestamp']).sort_values('timestamp',ascending=False).groupby(['study_name', 'individual_local_identifier'])
print("Epsilon: ",epsilon)
for name, group in res2:
    tuple_group=list(tuple(a.tolist()) for a in group.to_numpy())
    simplified_for_each_individual=rdpGC(tuple_group,epsilon)
     
    #simplified_dictionary = dict.fromkeys(fruits, "In stock")
    
    print(type(simplified_for_each_individual[0]))
    simplified_df = pd.DataFrame(simplified_for_each_individual, columns =list(df.columns))
    simplified_array=simplified_df[["long","lat"]].to_numpy()
    #simplified_df["location"]=[simplified_df["long"],simplified_df["lat"]]
    if i==1 :
        i=2
        print(simplified_df)
        break
    mydict[name[0]+"#!#"+name[1] ]=simplified_for_each_individual;
#print(mydict)

Epsilon:  0.005
<class 'tuple'>
             id           timestamp      long      lat       taxonomical_name  \
0   12217824651 2014-08-23 17:10:59 -118.6439  33.1642  Balaenoptera musculus   
1   12217824650 2014-08-23 11:47:32 -119.0028  33.2353  Balaenoptera musculus   
2   12217824649 2014-08-23 10:14:01 -119.0550  33.2785  Balaenoptera musculus   
3   12217824647 2014-08-23 04:33:18 -119.1527  33.4749  Balaenoptera musculus   
4   12217824646 2014-08-23 01:35:00 -119.1628  33.5863  Balaenoptera musculus   
..          ...                 ...       ...      ...                    ...   
89  12217824468 2014-08-05 02:22:57 -119.0046  33.9828  Balaenoptera musculus   
90  12217824467 2014-08-05 01:26:13 -119.0356  33.9930  Balaenoptera musculus   
91  12217824466 2014-08-05 00:47:06 -119.0312  34.0005  Balaenoptera musculus   
92  12217824462 2014-08-04 23:13:06 -119.0492  34.0167  Balaenoptera musculus   
93  12217824461 2014-08-04 21:33:14 -119.0606  33.9908  Balaenoptera musculus

In [253]:
simplified_df[["long","lat"]].to_numpy()

array([[-118.6439,   33.1642],
       [-119.0028,   33.2353],
       [-119.055 ,   33.2785],
       [-119.1527,   33.4749],
       [-119.1628,   33.5863],
       [-119.2325,   33.7644],
       [-119.3794,   33.914 ],
       [-119.4407,   33.9119],
       [-119.6041,   34.0568],
       [-119.4596,   34.2266],
       [-119.4428,   34.1488],
       [-119.1302,   34.0651],
       [-118.6332,   33.8046],
       [-118.2945,   33.6432],
       [-118.3004,   33.632 ],
       [-118.2952,   33.6384],
       [-118.2804,   33.5257],
       [-118.2231,   33.4109],
       [-117.8665,   32.9039],
       [-117.7101,   32.7447],
       [-117.5041,   32.7013],
       [-117.3384,   32.6021],
       [-117.2961,   32.4249],
       [-117.2227,   32.2006],
       [-116.8014,   31.8604],
       [-116.761 ,   31.7564],
       [-116.8511,   31.868 ],
       [-116.8341,   31.8865],
       [-116.7814,   31.8657],
       [-116.7251,   31.8067],
       [-116.763 ,   31.7504],
       [-116.7057,   31.6734],
       [