# First guesses for hub locations 
For n hubs ranging from 1 - 50, find first guesses for hub locations using k-means clustering

In [7]:
# import modules
import geopandas as gpd 
import pandas as pd 
import numpy as np
from sklearn.cluster import KMeans 

In [32]:
# read infoGrid
infoGridFile = gpd.read_file('data/infoGrid_ams.shp')
candiInfoFile = gpd.read_file('data/candiInfo_ams.shp')

In [92]:
# define function for k-means clustering of infoGrid, with nHubs as input
def findCentroids(n_clusters): 
    # prep kMeans input 
    infoGrid = infoGridFile.copy()
    infoGrid['x'] = infoGrid.geometry.x
    infoGrid['y'] = infoGrid.geometry.y
    X = np.array(infoGrid[['x', 'y']])

    # run kmeans 
    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(X)
    infoGrid['cluster'] = kmeans.labels_

    # find centroid for each cluster and closest candiInfo point 
    candiInfo = candiInfoFile.copy()
    clusterNums = list(infoGrid.cluster.unique())
    candiIndexes = []
    for clusterNum in clusterNums: 
        
        # find centroid 
        clusterGdf = infoGrid[infoGrid.cluster == clusterNum]
        clusterCentroid = clusterGdf.dissolve().centroid

        # find closest candiInfo point
        candiIndex = candiInfo.geometry.sindex.nearest(clusterCentroid)[1,0]
        candiIndexes.append(candiIndex)
    
    return candiIndexes

In [99]:
%%time
# run function for n hubs ranging from 1 - 50
candiDict = {}
for i in range(1, 50): 
    candiIndexes = findCentroids(i)
    candiDict[i] = candiIndexes 
firstGuessesDf = pd.DataFrame({'nHubs': candiDict.keys(), 'candiIndexes': candiDict.values()})

CPU times: total: 2min 1s
Wall time: 21.6 s


In [126]:
# save file
firstGuessesDf_r = firstGuessesDf.copy()
def plusOne(l): 
    return [i + 1 for i in l]
firstGuessesDf_r.candiIndexes = firstGuessesDf_r.candiIndexes.map(lambda l: plusOne(l))
firstGuessesDf_r.candiIndexes = firstGuessesDf_r.candiIndexes.map(lambda l: ','.join([str(i) for i in l]))
firstGuessesDf_r.to_csv('data/firstGuesses_ams.csv')