In [58]:
import numpy as np
import pandas as pd
import geopandas as gp

import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import DBSCAN
from meteostat import Point, Daily
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import haversine_distances



In [19]:
standard_stations = pd.read_csv("standard_stations.csv")
#df = pd.read_csv("baywheels_cleaned.csv")

In [20]:
points_in_radians = standard_stations[['station_latitude','station_longitude']].apply(np.radians).values
distances_in_km = haversine_distances(points_in_radians) * 6371

In [21]:
distance_matrix = distances_in_km

clustering = DBSCAN(eps= 0.1, min_samples=20)
scaler = MinMaxScaler()
df = standard_stations.copy()
df[['station_latitude', 'station_longitude']] = \
                            scaler.fit_transform(df[['station_latitude', 'station_longitude']])
clusters = clustering.fit_predict(df[['station_latitude', 'station_longitude']])

In [55]:
standard_stations['clusters'] = clusters
centroids = standard_stations.groupby("clusters")[['station_latitude', 'station_longitude']].mean()
centroids = centroids.reset_index().rename(columns={
    "station_latitude": "station_latitude_centroid",
    "station_longitude": "station_longitude_centroid",
})

In [56]:
centroids

Unnamed: 0,clusters,station_latitude_centroid,station_longitude_centroid
0,0,37.333472,-121.894176
1,1,37.754637,-122.425608
2,2,37.830497,-122.266752


In [49]:
fig = px.scatter_mapbox(standard_stations, lat='station_latitude', lon='station_longitude',
                        color = "clusters", 
                        mapbox_style="stamen-terrain")

fig.add_trace(go.Scattermapbox(
        lat=centroids['station_latitude'],
        lon=centroids['station_longitude'],
        mode='markers',
        hoverinfo='text'
    ))

fig.update_geos(fitbounds="locations")
fig.show()