In [None]:
import pandas as pd
import numpy as np
import googlemaps
import time
from config import get_key
from utils import distance
from visualize import cluster_map

In [None]:
gmaps = googlemaps.Client(key=get_key())
gmaps

In [None]:
types = ["art_gallery", "atm", "bakery", "bank", "bar", "beauty_salon", "bicycle_store", "book_store", "bus_station",
         "cafe", "campground", "clothing_store", "convenience_store", "doctor", "gym",
         "hospital", "laundry", "library", "liquor_store", "local_government_office", "lodging",
         "museum", "park", "parking", "pharmacy", "physiotherapist", "police", "post_office", "restaurant",
         "school", "stadium", "storage", "store", "supermarket", "transit_station"]

def page_search(loc=(32.881439,-117.237729), rad=500, category=''):
    results = []
    search = gmaps.places_nearby(location=loc, rank_by="distance", type=category)
    results += search["results"]
    while "next_page_token" in search:
        time.sleep(2)
        search = gmaps.places_nearby(location=loc, page_token=search["next_page_token"], radius=rad, type=category)
        results += search["results"]       
    return results


places = []
for t in types:
    places += page_search(category=t)
places

In [None]:
dct = {"name": [], "latitude": [], "longitude": []}
for place in places:
    dct["name"].append(place["name"])
    dct["latitude"].append(place["geometry"]["location"]["lat"])
    dct["longitude"].append(place["geometry"]["location"]["lng"])
dct

In [None]:
df = pd.DataFrame(dct)
df.head()

In [None]:
df.to_csv("places.csv")

In [3]:
places = {row[2]: (row[3], row[4]) for row in df.itertuples()}
places[:5]

{'Mandeville Annex Gallery': (32.8781369, -117.2403332),
 'University Art Gallery (UAG)': (32.87780120000001, -117.2407337),
 'Crafts Center Grove Gallery': (32.8756918, -117.2356496),
 'San Diego Center-Jewish Comm': (32.875716499999996, -117.21535039999999),
 'Gotthelf Art Gallery': (32.87561000000001, -117.21500400000001),
 'Waveriders Gallery': (32.89330359999999, -117.2175452),
 'Copia Art': (32.8697147, -117.21228290000002),
 'Symbolic Collection': (32.910368, -117.2292578),
 'La Jolla Art Association': (32.8540417, -117.2549193),
 'La Playa Gallery': (32.846457, -117.27413929999999),
 'Upwelling Fine Art Gallery': (32.85421700000001, -117.256759),
 'San Diego Art Loft': (32.91452999999999, -117.23387199999999),
 'Simic Galleries': (32.88247339999999, -117.19488899999999),
 'Oriental Heritage Inc': (32.9014363, -117.20040949999999),
 'little bench art center': (32.8491159, -117.27066570000001),
 'The La Jolla Gallery': (32.849284999999995, -117.27104150000001),
 'Don Morris Art':

In [4]:
def location(place_name):
    """
    Returns location of a place
    :param place_name: name of place on map
    :return: the location given as a tuple of longitude and latitude
    """
    return tuple((places[place_name][0], places[place_name][1]))

In [5]:
def group_by_centroid(places, centroids):
    """
    Assigns places to their respective closest centroids and returns a cluster of places for each centroid
    :param places: a sequence of places
    :param centroids: a sequence of centroids
    :return: a nested sequence containing sequences of places all closest to the same centroid
    """
    clusters = [[] for i in range(len(centroids))]
    for place_name, location in places.items():
        dists = [distance(centroid, location) for centroid in centroids]
        clusters[dists.index(min(dists))].append(location)
    return clusters

In [None]:
def find_centroid(cluster):
    """
    Returns centroids of given clusters
    :param cluster: 
    :return: 
    """
    return tuple((np.mean([i[0] for i in cluster]), np.mean([i[1] for i in cluster])))

In [7]:
def k_means(places, k, max_updates=100):
    """
    Uses the k-means algorithm to group places into k clusters
    :param places: a sequence of places
    :param k: amount of clusters to group places into
    :param max_updates: maximum number of centroid updates allowed
    :return: k number of centroids represented as a tuple of longitude and latitude
    """
    assert len(places) >= k, 'Not enough restaurants to cluster'
    
    old_centroids, n = [], 0
    centroids = [list(places.values())[i] for i in np.random.random_integers(0, len(places)-1, size=k)]

    while old_centroids != centroids and n < max_updates:
        old_centroids = centroids
        clusters = group_by_centroid(places, centroids)
        centroids = list(map(find_centroid, clusters))
        n += 1
    return centroids

In [8]:
centroids = k_means(places, 10)
centroids

  
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


[(32.846640265, -117.267924505625),
 (32.871520821686744, -117.21525358032127),
 (32.858495490909085, -117.20443320454547),
 (32.883971808510644, -117.22346975851067),
 (32.900021818840585, -117.19033939130432),
 (32.83039946585366, -117.2107857292683),
 (32.908157848181816, -117.23281771272727),
 (32.87558200434783, -117.23662421449274),
 (nan, nan),
 (32.881940756410245, -117.17290841794872)]

In [None]:
cluster_map(places, centroids)

Serving HTTP on 0.0.0.0 port 8000 ...
Type Ctrl-C to exit.
