In [None]:
from sklearn.cluster import DBSCAN
from shapely.geometry import MultiPoint,Point,Polygon
import pandas as pd
import geopandas as gp
from shapely import wkt
import numpy as np
import matplotlib.pyplot as plt

In [None]:
us_counties = gp.read_file(r'\Users\mhardika\Documents\AMO\GeoToolAll_Methods\GeoData\US_County_Boundaries\US_CountyBndrys.shp')
us_counties = us_counties.to_crs("EPSG:4326")

Try DBscan for Colorado
1. Read Colorado data
2. Split point data into lat and long county wise

In [None]:
df = pd.read_csv(r'\Users\mhardika\Documents\AMO\GeoToolAll_Methods\Water Source Data\Industrial\co_location_data.csv')
df.head()
location = df.location.apply(wkt.loads)
location = gp.GeoSeries(location)
location = location.set_crs("EPSG:3857")
location = location.to_crs("EPSG:4326")

df['lon'] = location.x
df['lat'] = location.y

df.head()

Function to find clusters and calculate their centroid

In [None]:
centroids = []

for county in df.county.unique():
    df1 = df[df['county']==county]
    coords = df1[['lat', 'lon']].to_numpy()

    # define the number of kilometers in one radian
    kms_per_radian = 6371.0088
    epsilon = 10/ kms_per_radian

    db = DBSCAN(eps=epsilon, min_samples=1, algorithm='ball_tree', metric='haversine').fit(np.radians(coords))
    cluster_labels = db.labels_

    # get the number of clusters
    num_clusters = len(set(cluster_labels))

    # turn the clusters in to a pandas series, where each element is a cluster of points
    clusters = pd.Series((coords[cluster_labels==n] for n in range(num_clusters)))

    for ea in clusters:
        centroids.append((MultiPoint(ea).centroid.x, MultiPoint(ea).centroid.y))

centroids = np.array(centroids)



In [None]:
state_code = '08'
state = us_counties.loc[us_counties['STATEFP']==state_code]

fig, (ax0,ax) = plt.subplots(1,2,figsize=(10,5))

# All places

state.plot(ax=ax,figsize=(50, 50),facecolor ='none',edgecolor ='black')
industry_loc = gp.GeoSeries(df.location.apply(wkt.loads))
industry_loc = industry_loc.set_crs("EPSG:3857")
industry_loc = industry_loc.to_crs("EPSG:4326")
industry_loc.plot(ax=ax)

state.plot(ax=ax0,figsize=(50, 50),facecolor ='none',edgecolor ='black')
industry_loc.plot(ax=ax0)

# centroids.plot(ax=ax)
ax.scatter(centroids[:,1],centroids[:,0],s=5,color='black')