In [1]:
import os
import gpxpy
import gpxpy.gpx
import sklearn
import numpy as np
from scipy import stats
from glob import glob

In [2]:
def parse_data():
    gpx_files = []
    vectors = []
    
    for file in glob("gpx_data/*.gpx"):
        gpx_file = gpxpy.parse(open(file))
        gpx_files.append(os.path.basename(file))
        
        for track in gpx_file.tracks:
            vector = [(point.latitude, point.longitude) for segment in track.segments for point in segment.points]
            vectors.append(vector)
    return vectors, gpx_files

In [3]:
paths, file_names = parse_data()

In [4]:
class BoundingBox(object):
    """
    A 2D bounding box
    """
    def __init__(self, points):
        if len(points) == 0:
            raise ValueError("Can't compute bounding box of empty list")
        self.minx, self.miny = float("inf"), float("inf")
        self.maxx, self.maxy = float("-inf"), float("-inf")
        for x, y in points:
            # Set min coords
            if x < self.minx:
                self.minx = x
            if y < self.miny:
                self.miny = y
            # Set max coords
            if x > self.maxx:
                self.maxx = x
            elif y > self.maxy:
                self.maxy = y
        self.midx = (self.minx + self.maxx) / 2
        self.midy = (self.miny + self.maxy) / 2
    @property
    def width(self):
        return self.maxx - self.minx
    @property
    def height(self):
        return self.maxy - self.miny

    def mid(self):
        return [self.midx, self.midy]
    def __repr__(self):
        return "BoundingBox({}, {}, {}, {})".format(
            self.minx, self.miny, self.maxx,self.maxy)

In [5]:
score_matrix = np.array([np.array([np.inf for _ in range(len(paths))], dtype="float64") for _ in range(len(paths))])
for i in range (len(paths)):
    for j in range (i+1, len(paths)):
            b1 = BoundingBox(paths[i])
            b2 = BoundingBox(paths[j])
            m1 = b1.mid()
            m2 = b2.mid()
            score = ((m2[0] - m1[0]) **2 + (m2[1] - m1[0]) **2) ** 1/2
            score_matrix[i][j] = score
            score_matrix[j][i] = score

score_matrix=np.array(score_matrix).reshape(len(paths), len(paths))
score_matrix


            
            

array([[           inf,  8235.07535657,  8235.08049001, ...,
         8235.51042164,  8235.41898009,  8235.2114001 ],
       [ 8235.07535657,            inf,  8235.07169899, ...,
         8235.50163039,  8235.41018887,  8235.20260876],
       [ 8235.08049001,  8235.07169899,            inf, ...,
         8235.5096516 ,  8235.41821006,  8235.21063005],
       ..., 
       [ 8235.51042164,  8235.50163039,  8235.5096516 , ...,
                   inf,  8235.41865925,  8235.21107925],
       [ 8235.41898009,  8235.41018887,  8235.41821006, ...,
         8235.41865925,            inf,  8235.16038478],
       [ 8235.2114001 ,  8235.20260876,  8235.21063005, ...,
         8235.21107925,  8235.16038478,            inf]])

In [7]:
# Top recommendations for User 0
tops = score_matrix[0].argsort()[:5]
results = []
for top in tops:
    results.append(file_names[top]) 
    
results

['20170826_123054.gpx',
 '20170904_110515.gpx',
 '20161102_104004.gpx',
 '20160901_162644.gpx',
 '20150910_185750.gpx']