In [1]:
import numpy as np
import pandas as pd
import geopandas as gp
import datetime as datetime
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import DBSCAN
from meteostat import Point, Hourly
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import haversine_distances



ModuleNotFoundError: No module named 'meteostat'

In [None]:
standard_stations = pd.read_csv("standard_stations.csv")
#df = pd.read_csv("baywheels_cleaned.csv")

In [None]:
points_in_radians = standard_stations[['station_latitude','station_longitude']].apply(np.radians).values
distances_in_km = haversine_distances(points_in_radians) * 6371

In [None]:
distance_matrix = distances_in_km

clustering = DBSCAN(eps= 0.1, min_samples=20)
scaler = MinMaxScaler()
df = standard_stations.copy()
df[['station_latitude', 'station_longitude']] = \
                            scaler.fit_transform(df[['station_latitude', 'station_longitude']])
clusters = clustering.fit_predict(df[['station_latitude', 'station_longitude']])

In [None]:
standard_stations['clusters'] = clusters
centroids = standard_stations.groupby("clusters")[['station_latitude', 'station_longitude']].mean()
centroids = centroids.reset_index().rename(columns={
    "station_latitude": "station_latitude_centroid",
    "station_longitude": "station_longitude_centroid",
})

In [None]:
centroids

In [None]:
centroids.to_csv("centroids.csv")

In [None]:
cluster_weather_dict = {}

In [None]:
start = datetime.datetime(2020, 12, 31)
end = datetime.datetime(2023, 1, 1)

for ii in centroids.index:
    
    
    # Create Point for Vancouver, BC
    location = Point(centroids.loc[ii]['station_latitude_centroid'], 
                     centroids.loc[ii]['station_longitude_centroid'])

    # Get daily data for 2018
    data = Hourly(location, start, end)
    data = data.fetch()
    cluster_weather_dict[centroids.loc[ii]['clusters']] = data
    # Plot line chart including average, minimum and maximum temperature
    data.plot(y=['temp', 'dwpt', 'rhum', 'prcp', 'snow', 
                 'wdir', 'wspd', 'wpgt', 'pres', 'tsun', 'coco'])


In [None]:
weather_data = pd.DataFrame()

for ii in cluster_weather_dict:
    cluster_weather_dict[ii]['centroid'] = ii
    weather_data = weather_data.append(cluster_weather_dict[ii])
    

In [None]:
weather_data.to_csv("weather_data.csv")

In [None]:
standard_stations['clusters'] = standard_stations['clusters'].astype(str)

In [None]:
fig = px.scatter_mapbox(standard_stations, lat='station_latitude', lon='station_longitude',
                        color = "clusters", 
                        mapbox_style="open-street-map",
                       )

fig.add_trace(go.Scattermapbox(
        lat=centroids['station_latitude_centroid'],
        lon=centroids['station_longitude_centroid'],
        mode='markers',
        hoverinfo='text',
        name="Cluster Centroids",
        marker=go.scattermapbox.Marker(
            size=15,
            color='rgb(0, 0, 0)',
            opacity=0.7
        ),
    ))

fig.update_geos(fitbounds="locations")
fig.show()
fig.write_html("Plots/clusters.html")