In [1]:
import pandas as pd
import numpy as np
import googlemaps
import time
from utils import distance, get_key
from visualize import cluster_map

In [2]:
gmaps = googlemaps.Client(key=get_key())
gmaps

<googlemaps.client.Client at 0x14398ecac50>

In [3]:
types = ["art_gallery", "atm", "bakery", "bank", "bar", "beauty_salon", "bicycle_store", "book_store", "bus_station",
         "cafe", "campground", "clothing_store", "convenience_store", "doctor", "gym",
         "hospital", "laundry", "library", "liquor_store", "local_government_office", "lodging",
         "museum", "park", "parking", "pharmacy", "physiotherapist", "police", "post_office", "restaurant",
         "school", "stadium", "storage", "store", "supermarket", "transit_station", "university"]

def page_search(loc=(32.881439,-117.237729), rad=500, category=''):
    results = []
    search = gmaps.places_nearby(location=loc, rank_by="distance", type=category)
    results += search["results"]
    while "next_page_token" in search:
        time.sleep(2)
        search = gmaps.places_nearby(location=loc, page_token=search["next_page_token"], radius=rad, type=category)
        results += search["results"]       
    return results

places = []
for t in types:
    places += page_search(category=t)
    
places[0]

{'geometry': {'location': {'lat': 32.8781369, 'lng': -117.2403332},
  'viewport': {'northeast': {'lat': 32.8794694802915,
    'lng': -117.2387766197085},
   'southwest': {'lat': 32.8767715197085, 'lng': -117.2414745802915}}},
 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/generic_business-71.png',
 'id': 'eb175531d46075e176a0c51290da00a24108de63',
 'name': 'Mandeville Annex Gallery',
 'place_id': 'ChIJweM8xMYG3IAR6GgXGxeFbVQ',
 'plus_code': {'compound_code': 'VQH5+7V San Diego, California, United States',
  'global_code': '8544VQH5+7V'},
 'reference': 'ChIJweM8xMYG3IAR6GgXGxeFbVQ',
 'scope': 'GOOGLE',
 'types': ['art_gallery', 'university', 'point_of_interest', 'establishment'],
 'vicinity': 'San Diego'}

In [4]:
dct = {"name": [], "latitude": [], "longitude": []}
for place in places:
    dct["name"].append(place["name"])
    dct["latitude"].append(place["geometry"]["location"]["lat"])
    dct["longitude"].append(place["geometry"]["location"]["lng"])
    
{key:dct[key][:3] for key in dct.keys()}

{'name': ['Mandeville Annex Gallery',
  'University Art Gallery (UAG)',
  'Crafts Center Grove Gallery'],
 'latitude': [32.8781369, 32.87780120000001, 32.8756918],
 'longitude': [-117.2403332, -117.2407337, -117.2356496]}

In [5]:
df = pd.DataFrame(dct)
df.head()

Unnamed: 0,name,latitude,longitude
0,Mandeville Annex Gallery,32.878137,-117.240333
1,University Art Gallery (UAG),32.877801,-117.240734
2,Crafts Center Grove Gallery,32.875692,-117.23565
3,San Diego Center-Jewish Comm,32.875717,-117.21535
4,Gotthelf Art Gallery,32.87561,-117.215004


In [6]:
df.to_csv("places.csv")

In [7]:
places = {row[1]: (row[2], row[3]) for row in df.itertuples()}

{key:places[key] for key in list(places.keys())[:5]}

{'Mandeville Annex Gallery': (32.8781369, -117.2403332),
 'University Art Gallery (UAG)': (32.87780120000001, -117.2407337),
 'Crafts Center Grove Gallery': (32.8756918, -117.2356496),
 'San Diego Center-Jewish Comm': (32.8757165, -117.2153504),
 'Gotthelf Art Gallery': (32.87561000000001, -117.215004)}

In [8]:
def location(place_name):
    """
    Returns location of a place
    :param place_name: name of place on map
    :return: the location given as a tuple of longitude and latitude
    """
    return tuple((places[place_name][0], places[place_name][1]))

In [9]:
def group_by_centroid(places, centroids):
    """
    Assigns places to their respective closest centroids and returns a cluster of places for each centroid
    :param places: a sequence of places
    :param centroids: a sequence of centroids
    :return: a nested sequence containing sequences of places all closest to the same centroid
    """
    clusters = [[] for i in range(len(centroids))]
    for place_name, location in places.items():
        dists = [distance(centroid, location) for centroid in centroids]
        clusters[dists.index(min(dists))].append(location)
    return clusters

In [10]:
def find_centroid(cluster):
    """
    Returns centroids of given clusters
    :param cluster: 
    :return: 
    """
    return tuple((np.mean([i[0] for i in cluster]), np.mean([i[1] for i in cluster])))

In [11]:
def k_means(places, k, max_updates=100):
    """
    Uses the k-means algorithm to group places into k clusters
    :param places: a sequence of places
    :param k: amount of clusters to group places into
    :param max_updates: maximum number of centroid updates allowed
    :return: k number of centroids represented as a tuple of longitude and latitude
    """
    assert len(places) >= k, 'Not enough restaurants to cluster'
    
    old_centroids, n = [], 0
    centroids = [list(places.values())[i] for i in np.random.random_integers(0, len(places)-1, size=k)]

    while old_centroids != centroids and n < max_updates:
        old_centroids = centroids
        clusters = group_by_centroid(places, centroids)
        centroids = list(map(find_centroid, clusters))
        n += 1
    return centroids

In [12]:
centroids = k_means(places, 10)
centroids

  if sys.path[0] == '':


[(32.87260329361702, -117.23244322462006),
 (32.90002181884058, -117.19033939130433),
 (32.856135267164184, -117.25215054029852),
 (32.88284068087649, -117.23993077370518),
 (32.88350348061225, -117.22318163877554),
 (32.83127318604651, -117.21116917906978),
 (32.8700348699187, -117.21227528414632),
 (32.87937693953488, -117.1737037046512),
 (32.84494836837607, -117.27318162991453),
 (32.91224888674698, -117.22985600120482)]

In [None]:
cluster_map(places, centroids)

Serving HTTP on 0.0.0.0 port 8000 ...
Type Ctrl-C to exit.
