In [1]:
import pandas as pd
import numpy as np
from utils import distance
from visualize import cluster_map

In [2]:
df = pd.read_csv("places.csv")
df

Unnamed: 0.1,Unnamed: 0,name,latitude,longitude
0,0,Teaching + Learning Commons,32.881141,-117.237613
1,1,Eucalyptus Point,32.881492,-117.239411
2,2,Thurgood Marshall College Lower Apartments,32.882616,-117.239135
3,3,Center for Research in Language,32.880529,-117.239433
4,4,UC San Diego Jacobs School of Engineering,32.881468,-117.235483
5,5,Institute for Nonlinear Science,32.880493,-117.235506
6,6,California Reading and Literature Project,32.880704,-117.240077
7,7,Social Science Research Building,32.880704,-117.240077
8,8,Campus Web Office,32.880704,-117.240077
9,9,Department of Electrical and Computer Engineering,32.881699,-117.235231


In [3]:
places = {row[2]: (row[3], row[4]) for row in df.itertuples()}
places

{'Teaching + Learning Commons': (32.8811414, -117.23761259999999),
 'Eucalyptus Point': (32.8815091, -117.2397034),
 'Thurgood Marshall College Lower Apartments': (32.88261579999999,
  -117.239135),
 'Center for Research in Language': (32.880529100000004, -117.23943349999999),
 'UC San Diego Jacobs School of Engineering': (32.8814678, -117.2354827),
 'Institute for Nonlinear Science': (32.880493300000005, -117.23550619999999),
 'California Reading and Literature Project': (32.8807041,
  -117.24007659999998),
 'Social Science Research Building': (32.8807041, -117.24007659999998),
 'Campus Web Office': (32.8807041, -117.24007659999998),
 'Department of Electrical and Computer Engineering': (32.88169879999999,
  -117.23523059999998),
 'Center for Wireless Communications': (32.88169879999999,
  -117.23523059999998),
 'Media Center and Communications Building': (32.8814907, -117.24028329999999),
 'Laboratory of Comparative Human Cognition': (32.8806143, -117.24015),
 'UC San Diego School of

In [4]:
def location(place_name):
    """
    Returns location of a place
    :param place_name: name of place on map
    :return: the location given as a tuple of longitude and latitude
    """
    return tuple((places[place_name][0], places[place_name][1]))

In [5]:
def group_by_centroid(places, centroids):
    """
    Assigns places to their respective closest centroids and returns a cluster of places for each centroid
    :param places: a sequence of places
    :param centroids: a sequence of centroids
    :return: a nested sequence containing sequences of places all closest to the same centroid
    """
    clusters = [[] for i in range(len(centroids))]
    for place_name, location in places.items():
        dists = [distance(centroid, location) for centroid in centroids]
        clusters[dists.index(min(dists))].append(location)
    return clusters

In [6]:
def find_centroid(cluster):
    """
    Returns updated centroid of given cluster
    :param cluster: a sequence of places
    :return: tuple of latitude and longitude for updated centroid
    """
    return tuple((np.mean([i[0] for i in cluster]), np.mean([i[1] for i in cluster])))

In [7]:
def k_means(places, k, max_updates=100):
    """
    Uses the k-means algorithm to group places into k clusters
    :param places: a sequence of places
    :param k: amount of clusters to group places into
    :param max_updates: maximum number of centroid updates allowed
    :return: k number of centroids represented as a tuple of longitude and latitude
    """
    assert len(places) >= k, 'Not enough restaurants to cluster'
    
    old_centroids, n = [], 0
    indexes = list(np.random.choice(range(len(places)), size=k, replace=False))
    centroids = [list(places.values())[i] for i in indexes]

    while old_centroids != centroids and n < max_updates:
        old_centroids = centroids
        clusters = group_by_centroid(places, centroids)
        centroids = list(map(find_centroid, clusters))
        n += 1
    return centroids

In [8]:
centroids = k_means(places, 9)
centroids

[(32.87564956971831, -117.23282811690143),
 (32.88020005970149, -117.22484307761192),
 (32.88144393658537, -117.23369553414634),
 (32.881295200000004, -117.24047557894737),
 (32.87976104202899, -117.2364095115942),
 (32.87633623513514, -117.23597777837838),
 (32.88623628928571, -117.24113197142856),
 (32.87381117333333, -117.24130937999999),
 (32.87788396153846, -117.24085857307692)]

In [None]:
cluster_map(places, centroids)

Serving HTTP on 0.0.0.0 port 8000 ...
Type Ctrl-C to exit.
