In [1]:
import sys
import pandas as pd
import xml.etree.ElementTree as ET
from math import radians, cos, sin, asin, sqrt
from pykalman import KalmanFilter

filename1 = sys.argv[1]
filename1 = "walk1.gpx"

In [2]:
def output_gpx(points, output_filename):
    """
    Output a GPX file with latitude and longitude from the points DataFrame.
    """
    from xml.dom.minidom import getDOMImplementation
    def append_trkpt(pt, trkseg, doc):
        trkpt = doc.createElement('trkpt')
        trkpt.setAttribute('lat', '%.8f' % (pt['lat']))
        trkpt.setAttribute('lon', '%.8f' % (pt['lon']))
        trkseg.appendChild(trkpt)
    
    doc = getDOMImplementation().createDocument(None, 'gpx', None)
    trk = doc.createElement('trk')
    doc.documentElement.appendChild(trk)
    trkseg = doc.createElement('trkseg')
    trk.appendChild(trkseg)
    
    points.apply(append_trkpt, axis=1, trkseg=trkseg, doc=doc)
    
    with open(output_filename, 'w') as fh:
        doc.writexml(fh, indent=' ')
        
def read_gpx(filename):
    tree = ET.parse(filename)
    ns = tree.getroot().tag.split('}')[0]+'}'
    pt_nodes = tree.findall("./{0}trk/{0}trkseg/{0}trkpt".format(ns))
    pts = [l.attrib for l in pt_nodes]
    df = pd.DataFrame(pts)
    df['lat'] =df['lat'].apply(float)
    df['lon'] =df['lon'].apply(float)
    return df

def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    r = r * 1000 # convert to meters
    return c * r


In [3]:
def distance(points_df):
    points_df.columns = ['lat1','lon1']
    points_shift = points_df.shift(1)
    points_shift.columns = ['lat2','lon2']
    points_comb = points_df.join(points_shift)[1:]
    points_comb['dist'] = points_comb.apply(lambda x: haversine(x.lat1,x.lon1,x.lat2,x.lon2),axis=1)
    return points_comb['dist'].sum()

points = read_gpx(filename1)
print('Unfiltered distance: %0.2f' % (distance(points)))

Unfiltered distance: 3186.44


In [10]:
def smooth(points_df):
    initial_state = points_df.iloc[0]
    observation_stddev = 10 / (10 ** 5)
    transition_stddev = 0.2 / (10 ** 5)
    observation_covariance = [[observation_stddev ** 2, 0], [0, observation_stddev ** 2]]
    transition_covariance = [[transition_stddev ** 2, 0], [0, transition_stddev ** 2]]


    kf = KalmanFilter(
                    initial_state_mean=initial_state,
                    transition_matrices = [[1, 0], [0, 1]], 
                    observation_matrices = [[1, 0], [0, 1]],
                    observation_covariance = observation_covariance,
                    transition_covariance = transition_covariance)
    kalman_smoothed, _ = kf.smooth(points)
    df = pd.DataFrame(columns= ['lat','lon'])
    df['lat'] = smoothed_points[:,0]
    df['lon'] = smoothed_points[:,0]
    return kalman_smoothed


smoothed_points=smooth(points)
smoothed_points[:,0]
#print('Filtered distance: %0.2f' % (distance(smoothed_points)))

array([ 49.27911473,  49.27911431,  49.27911345,  49.27911215,
        49.27911033,  49.27910807,  49.27910529,  49.2791021 ,
        49.27909838,  49.27909423,  49.27908962,  49.27908456,
        49.27907902,  49.27907304,  49.27906666,  49.27905985,
        49.27905262,  49.279045  ,  49.27903697,  49.2790285 ,
        49.27901961,  49.27901023,  49.27900052,  49.27899035,
        49.27897971,  49.27896858,  49.27895687,  49.27894468,
        49.27893201,  49.27891885,  49.27890521,  49.27889103,
        49.2788763 ,  49.27886103,  49.2788452 ,  49.27882883,
        49.2788119 ,  49.27879443,  49.27877642,  49.27875787,
        49.27873877,  49.27871915,  49.27869907,  49.27867855,
        49.27865766,  49.27863643,  49.27861503,  49.2785934 ,
        49.27857158,  49.27854957,  49.27852733,  49.27850496,
        49.27848254,  49.27846007,  49.27843765,  49.27841522,
        49.27839279,  49.27837039,  49.27834802,  49.27832566,
        49.27830332,  49.27828096,  49.27825864,  49.27

In [None]:
points2 = pd.DataFrame({
    'lat': [49.28, 49.26, 49.26],
    'lon': [123.00, 123.10, 123.05]})
points2
distance(points2)

In [None]:
haversine(49.27572142,123.01833098,49.2758587,123.0183036)


In [None]:
def main():
    points = get_data(sys.argv[1])
    print('Unfiltered distance: %0.2f' % (distance(points),))
    
    smoothed_points = smooth(points)
    print('Filtered distance: %0.2f' % (distance(smoothed_points),))
    output_gpx(smoothed_points, 'out.gpx')


if __name__ == '__main__':
    main()