In [None]:
import pandas as pd
import numpy as np
import googlemaps
import time
from utils import distance, get_key
from visualize import cluster_map

In [None]:
gmaps = googlemaps.Client(key=get_key())
gmaps

In [None]:
types = ["art_gallery", "atm", "bakery", "bank", "bar", "beauty_salon", "bicycle_store", "book_store", "bus_station",
         "cafe", "campground", "clothing_store", "convenience_store", "doctor", "gym",
         "hospital", "laundry", "library", "liquor_store", "local_government_office", "lodging",
         "museum", "park", "parking", "pharmacy", "physiotherapist", "police", "post_office", "restaurant",
         "school", "stadium", "storage", "store", "supermarket", "transit_station"]

def page_search(loc=(32.881439,-117.237729), rad=500, category=''):
    results = []
    search = gmaps.places_nearby(location=loc, rank_by="distance", type=category)
    results += search["results"]
    while "next_page_token" in search:
        time.sleep(2)
        search = gmaps.places_nearby(location=loc, page_token=search["next_page_token"], radius=rad, type=category)
        results += search["results"]       
    return results


places = []
for t in types:
    places += page_search(category=t)
places

In [None]:
dct = {"name": [], "latitude": [], "longitude": []}
for place in places:
    dct["name"].append(place["name"])
    dct["latitude"].append(place["geometry"]["location"]["lat"])
    dct["longitude"].append(place["geometry"]["location"]["lng"])
dct

In [None]:
df = pd.DataFrame(dct)
df.head()

In [None]:
df.to_csv("places.csv")

In [None]:
places = {row[2]: (row[3], row[4]) for row in df.itertuples()}
places[:5]

In [None]:
def location(place_name):
    """
    Returns location of a place
    :param place_name: name of place on map
    :return: the location given as a tuple of longitude and latitude
    """
    return tuple((places[place_name][0], places[place_name][1]))

In [None]:
def group_by_centroid(places, centroids):
    """
    Assigns places to their respective closest centroids and returns a cluster of places for each centroid
    :param places: a sequence of places
    :param centroids: a sequence of centroids
    :return: a nested sequence containing sequences of places all closest to the same centroid
    """
    clusters = [[] for i in range(len(centroids))]
    for place_name, location in places.items():
        dists = [distance(centroid, location) for centroid in centroids]
        clusters[dists.index(min(dists))].append(location)
    return clusters

In [None]:
def find_centroid(cluster):
    """
    Returns centroids of given clusters
    :param cluster: 
    :return: 
    """
    return tuple((np.mean([i[0] for i in cluster]), np.mean([i[1] for i in cluster])))

In [None]:
def k_means(places, k, max_updates=100):
    """
    Uses the k-means algorithm to group places into k clusters
    :param places: a sequence of places
    :param k: amount of clusters to group places into
    :param max_updates: maximum number of centroid updates allowed
    :return: k number of centroids represented as a tuple of longitude and latitude
    """
    assert len(places) >= k, 'Not enough restaurants to cluster'
    
    old_centroids, n = [], 0
    centroids = [list(places.values())[i] for i in np.random.random_integers(0, len(places)-1, size=k)]

    while old_centroids != centroids and n < max_updates:
        old_centroids = centroids
        clusters = group_by_centroid(places, centroids)
        centroids = list(map(find_centroid, clusters))
        n += 1
    return centroids

In [None]:
centroids = k_means(places, 10)
centroids

In [None]:
cluster_map(places, centroids)