In [1]:
import pandas as pd
import numpy as np

import os

from sklearn.cluster import DBSCAN
import sklearn.utils
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.metrics.pairwise import haversine_distances

import matplotlib.pyplot as plt
import seaborn as sns

import math
from math import radians
import datetime

from keplergl import KeplerGl
import geopandas as gpd

In [2]:
# ['N252LB', 'N253LB', 'N271LB', 'N329LB', 'N789LB', 'N235LB']

# Loons to compute
target_loons = ['N225LB', 'N253LB']


#-------------------USER INPUT!!!-------------------

# Compute the range of distances to be used in DBSCAN
num_values = 20
start_distance = 0.1
end_distance = 20

step_size = (end_distance - start_distance) / (num_values-1)
max_distances = np.arange(start_distance, end_distance + step_size, step_size)

print(max_distances)

#---------------------------------------------------


# Iterate over the batch

for loon in target_loons:

    # First the loon data is read
    bl = pd.read_csv('../../../../og_data/' + loon + '.csv', parse_dates=['ts'], low_memory=False) 
    
    kms_per_radian = 6371.0088

    # Compute the DBSCANs with the range of distances
    for distance in max_distances:

        dbscan_bl = StandardScaler().fit_transform(bl[['lat', 'lon']])

        # eps parameter must be in radians
        eps = distance / kms_per_radian

        # For now we set MinPoints = 4
        db = DBSCAN(eps=eps, min_samples=4, algorithm='ball_tree', metric='haversine') \
                .fit(np.radians(dbscan_bl))

        print(str(loon) + ': ' + str(np.unique(db.labels_)) + '; eps=' + str(eps) \
                + '; max_distance= ' + str(distance))

        bl['cluster'] = db.labels_
        bl['eps'] = eps
        bl['max_distance'] = distance


        # Save clustered baseline to file   
        bl.to_csv( '../../../../og_data/DBSCAN/epsRange/' + str(loon) \
                                      + '/CL-' + loon \
                                      + '-' + 'mp_4' + '-eps_' + str(eps) \
                                      + '.csv', index=False, encoding='utf-8-sig')
    
    


[ 0.1         1.14736842  2.19473684  3.24210526  4.28947368  5.33684211
  6.38421053  7.43157895  8.47894737  9.52631579 10.57368421 11.62105263
 12.66842105 13.71578947 14.76315789 15.81052632 16.85789474 17.90526316
 18.95263158 20.        ]
N225LB: [  -1    0    1 ... 1289 1290 1291]; eps=1.5696101377226163e-05; max_distance= 0.1
N225LB: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22
 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68]; eps=0.0001800921105387002; max_distance= 1.1473684210526316
N225LB: [-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22
 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40]; eps=0.0003444881197001742; max_distance= 2.194736842105263


KeyboardInterrupt: 