In [1]:
import pandas as pd
import plotly.graph_objs as go
import plotly.offline as offline
from plotly.graph_objs import Scatter, Layout
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
offline.init_notebook_mode(connected=True)

**Clustering Buildings**
Functions to cluster buildings using DBScan, KMeans, and Hierarchical Ward
Always define parameters and read CSV file. Select one or more of the algorithms by running their individual cells.

Define Parameters


In [6]:
csvFileName = "CSVCoordinates/ArroyoGrande_Buildings_Coordinates_original.csv"
numClusters = 20

Read CSV File

In [7]:
field_names = ['X', 'Y']
buildings = pd.read_csv(csvFileName, header=None, names=field_names)
buildings['X'].head()

0   -120.610196
1   -120.612904
2   -120.613623
3   -120.613902
4   -120.612228
Name: X, dtype: float64

In [8]:
# Create a trace for building points
trace = go.Scatter(
    x = buildings['X'].as_matrix(),
    y = buildings['Y'].as_matrix(),
    name = 'Buildings',
    mode = 'markers',
    marker = dict(
        size = 6,
        color = 'rgba(42, 147, 227, .50)',
    )
)

KMeans Clustering

In [9]:
kmeans = KMeans(n_clusters=numClusters, init = 'k-means++').fit(buildings.as_matrix())
kxs = []
kys = []
for k in kmeans.cluster_centers_:
    kxs.append(k[0])
    kys.append(k[1])


clusterLabels = kmeans.labels_
citiesInCluster = [0] * len(kmeans.cluster_centers_)
for x in range(0, len(clusterLabels)):
    citiesInCluster[clusterLabels[x]] += 1

totalBuildings = len(buildings)

kTrace = [0] * len(kmeans.cluster_centers_)

#Create a trace for the kmeans values with slightly larger dots than the buildings
kTrace = go.Scatter(
    x = kxs,
    y = kys,
    name = 'KMeans Clusters',
    mode = 'markers',
    marker = dict(
        size = 12,
        color = 'rgba(56, 153, 0, 0.75)'
        ,
        
    )
)

#Plot result
points = [trace]
points.append(kTrace)
#Gets name of city from the name of the csv file
cityName = csvFileName.split("/")[1].split("_")[0]
#Plot
offline.plot({
        "data": points,
        'layout': {'title': cityName + ' Building Cluster Coordinates with ' + str(numClusters) + ' Clusters'}
        },
        filename = cityName + 'Kmeans Plot' + str(numClusters) + '.html',
        image_filename = cityName + 'Kmeans Plot' + str(numClusters),
        image = 'png'
        )

'file:///Users/sanchez.hmsc/Documents/GitHub/MoNeT/Dev/SarafinaSmith/ArroyoGrandeKmeans Plot20.html'

DB Scan

In [29]:
db = DBSCAN(0.0004, 10).fit(buildings.as_matrix())

dbPointsLen = max(db.labels_)
numClusters = dbPointsLen + 1

dbPoints = []

clusterLabels = db.labels_
citiesInCluster = [0] * len(clusterLabels)
for x in range(0, len(clusterLabels)):
    citiesInCluster[clusterLabels[x]] += 1

for x in range(0, dbPointsLen + 1):
    dbPoints.append([])

for x in range(0, len(buildings)):
    if(db.labels_[x] >= 0):
        dbPoints[db.labels_[x]].append(buildings.as_matrix()[x])

dbCentersX = []
dbCentersY = []

for x in range(0, dbPointsLen + 1):
    sumX = 0
    sumY = 0
    for y in range(0, len(dbPoints[x])):
        sumX += dbPoints[x][y][0]
        sumY += dbPoints[x][y][1]
    dbCentersX.append(sumX / len(dbPoints[x]))
    dbCentersY.append(sumY / len(dbPoints[x]))

#Create a trace for the kmeans values with slightly larger dots than the buildings
dbCentersTrace = go.Scatter(
    x = dbCentersX,
    y = dbCentersY,
    name = 'DB Clusters',
    mode = 'markers',
    marker = dict(
        size = 12,
        color = 'rgba(255, 0, 0, 0.75)'
    )
)

#Plot result
points = [trace]
points.append(dbCentersTrace)
#Gets name of city from the name of the csv file
cityName = csvFileName.split("/")[1].split("_")[0]
#Plot
offline.plot({
        "data": points,
        'layout': {'title': cityName + ' Building Cluster Coordinates with ' + str(numClusters) + ' Clusters'}
        },
        filename = cityName + 'DBScan Plot' + str(numClusters) + '.html',
        image_filename = cityName + 'DBScan Plot' + str(numClusters),
        image = 'png'
        )

'file:///Users/kevinsmith/marshallLab/MGDrivE/BRInE/PyhtonClustering/ArroyoGrandeDBScan Plot158.html'

Agglomerative Clustering

In [34]:
agg = AgglomerativeClustering(n_clusters = numClusters).fit(buildings.as_matrix())
aggPoints = []

for x in range(0, numClusters):
    aggPoints.append([])

for x in range(0, len(buildings)):
    aggPoints[agg.labels_[x]].append(buildings.as_matrix()[x])

centersX = []
centersY = []

clusterLabels = agg.labels_
citiesInCluster = [0] * len(clusterLabels)
for x in range(0, len(clusterLabels)):
    citiesInCluster[clusterLabels[x]] += 1

for x in range(0, numClusters):
    sumX = 0
    sumY = 0
    for y in range(0, len(aggPoints[x])):
        sumX += aggPoints[x][y][0]
        sumY += aggPoints[x][y][1]
    centersX.append(sumX / len(aggPoints[x]))
    centersY.append(sumY / len(aggPoints[x]))

aggCentersTrace = go.Scatter(
    x = centersX,
    y = centersY,
    name = 'Agg Clusters',
    mode = 'markers',
    marker = dict(
        size = 12,
        color = 'rgba(153, 0, 153, 0.75)'
    )
)

#Plot result
points = [trace]
points.append(aggCentersTrace)
#Gets name of city from the name of the csv file
cityName = csvFileName.split("/")[1].split("_")[0]
#Plot
offline.plot({
        "data": points,
        'layout': {'title': cityName + ' Building Cluster Coordinates with ' + str(numClusters) + ' Clusters'}
        },
        filename = cityName + 'Agglomerative Clustering Plot' + str(numClusters) + '.html',
        image_filename = cityName + 'Agglomerative Clustering Plot' + str(numClusters),
        image = 'png'
        )

'file:///Users/kevinsmith/marshallLab/MGDrivE/BRInE/PyhtonClustering/ArroyoGrandeAgglomerative Clustering Plot158.html'