In [3]:
import pandas as pd
import numpy as np
import os
import glob

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.cluster import DBSCAN
import sklearn.utils
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.metrics.pairwise import haversine_distances

from math import radians


In [4]:
# Let's generate some possible stay times
# We set the lower bound to 3 hours and as upper bound we set 48 hours

#['N139LB', 'N166LB', 'N191LB', 'N211LB', 'N221LB', 'N225LB', 'N226LB', 'N228LB', 'N234LB',\
#                   'N235LB', 'N238LB', 'N252LB', 'N253LB', 'N271LB', 'N329LB', 'N789LB']

target_loons = ['N235LB']
 
freq_median = pd.to_timedelta(61, unit='s')

upper_bound = 48
lower_bound = 3

num_values = 20

step_size = (upper_bound - lower_bound) / (num_values-1)
stay_point_sizes = np.arange(lower_bound, upper_bound + step_size, step_size)

stay_point_sizes = np.around(stay_point_sizes, decimals=0, out=None)

stay_point_sizes = pd.to_timedelta(stay_point_sizes, unit='h')
stay_point_sizes = stay_point_sizes / freq_median

stay_point_sizes

Float64Index([177.04918032786884, 295.08196721311475,  472.1311475409836,
               590.1639344262295,  708.1967213114754,  885.2459016393443,
              1003.2786885245902,  1180.327868852459,  1298.360655737705,
              1416.3934426229507, 1593.4426229508197, 1711.4754098360656,
              1829.5081967213114, 2006.5573770491803,  2124.590163934426,
              2301.6393442622953, 2419.6721311475408, 2537.7049180327867,
               2714.754098360656, 2832.7868852459014],
             dtype='float64')

In [5]:
kms_per_radian = 6371.0088

for loon in target_loons:
    
    bl = pd.read_csv('../../../../og_data/DBSCAN/epsChosen/CL-' + loon + '-eps.csv', parse_dates=['ts'], \
                     low_memory=False)
    
    eps = bl['eps'].unique()[0]
    print(eps)

    for minpts in stay_point_sizes:

        dbscan_bl = StandardScaler().fit_transform(bl[['lat', 'lon']])

        db = DBSCAN(eps=eps, min_samples=minpts, algorithm='ball_tree', metric='haversine') \
                .fit(np.radians(dbscan_bl))

        print(str(loon) + ': ' + str(np.unique(db.labels_)) + '; eps=' + str(eps) \
                + '; min_pts= ' + str(minpts))

        bl['cluster'] = db.labels_
        bl['minpts'] = minpts

        # Save to file   
        bl.to_csv( '../../../../og_data/DBSCAN/minptsRange/' + str(loon) \
                                      + '/CL-' + loon \
                                      + '-' + 'mp_' + str(minpts) + '-eps_' + str(eps) \
                                      + '.csv', index=False, encoding='utf-8-sig')

0.001330864174669018
N235LB: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]; eps=0.001330864174669018; min_pts= 177.04918032786884
N235LB: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]; eps=0.001330864174669018; min_pts= 295.08196721311475
N235LB: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13]; eps=0.001330864174669018; min_pts= 472.1311475409836
N235LB: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]; eps=0.001330864174669018; min_pts= 590.1639344262295
N235LB: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12]; eps=0.001330864174669018; min_pts= 708.1967213114754
N235LB: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]; eps=0.001330864174669018; min_pts= 885.2459016393443
N235LB: [-1  0  1  2  3  4  5  6  7  8  9]; eps=0.001330864174669018; min_pts= 1003.2786885245902
N235LB: [-1  0  1  2  3  4  5]; eps=0.001330864174669018; min_pts= 1180.327868852459
N235LB: [-1  0  1  2  3  4  5]; eps=0.001330864174669018; min_pts= 1298.360655737705
N235LB: [-1  0  1  2  3  4]; eps=0.00133