In [6]:
import pandas as pd
import requests
import os
import folium 
import matplotlib.cm as cm
import matplotlib.colors as colors
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim 


In [25]:
page = "https://es.wikipedia.org/wiki/Anexo:Comunas_de_Chile"
soup = BeautifulSoup(requests.get(page).text, "lxml")


In [94]:
table = soup.find("table",{"class": "wikitable sortable"})
column_names=['CUT','Comuna','Emblema', "Provincia",'Region','Superficie','Poblacion', "Densidad",'idh1','idh2','Latitud', "Longitud"]
df = pd.DataFrame(columns=column_names)

for tr in table.find_all('tr'):
    row=[]
    for td in tr.find_all('td'):
        row.append(td.text.strip())
    if len(row)==12:
        df.loc[len(df)] = row

In [95]:
df = df.drop(columns=["Emblema","idh1","idh2", "Poblacion", "Provincia", "Densidad", "CUT"])
df.head()

Unnamed: 0,Comuna,Region,Superficie,Latitud,Longitud
0,Arica,Arica y Parinacota,4799.4,"-18°27'18""","-70°17'24"""
1,Camarones,Arica y Parinacota,3927.0,"-19°1'1.2""","-69°52'1.2"""
2,Putre,Arica y Parinacota,5902.5,"-18°12'0""","-69°34'58.8"""
3,General Lagos,Arica y Parinacota,2244.4,"-17°39'10.8""","-69°38'6"""
4,Iquique,Tarapacá,2242.1,"-20°14'38.4""","-70°8'20.4"""


In [96]:
df = df[df["Region"] == "Metropolitana de Santiago"]
df.head(15)

Unnamed: 0,Comuna,Region,Superficie,Latitud,Longitud
294,Santiago,Metropolitana de Santiago,23.2,"-33°26'14""","-70°39'26"""
295,Cerrillos,Metropolitana de Santiago,21.0,"-33°30'0""","-70°43'0"""
296,Cerro Navia,Metropolitana de Santiago,11.0,"-33°25'19.2""","-70°44'6"""
297,Conchalí,Metropolitana de Santiago,10.7,"-33°22'48""","-70°40'30"""
298,El Bosque,Metropolitana de Santiago,14.2,"-33°34'1.2""","-70°40'30"""
299,Estación Central,Metropolitana de Santiago,15.0,"-33°27'32.4""","-70°41'56.4"""
300,Huechuraba,Metropolitana de Santiago,44.8,"-33°22'4.8""","-70°38'2.4"""
301,Independencia,Metropolitana de Santiago,7.0,"-33°24'46.8""","-70°39'57.6"""
302,La Cisterna,Metropolitana de Santiago,10.0,"-33°31'44.4""","-70°39'46.8"""
303,La Florida,Metropolitana de Santiago,70.2,"-33°31'30""","-70°32'16.8"""


In [107]:
def dms2dd(degrees, minutes, seconds):
    if float(degrees) < 0:
        dd = float(degrees) - float(minutes)/60 - float(seconds)/(60*60)
    else:
        dd = float(degrees) + float(minutes)/60 + float(seconds)/(60*60)
    return dd

def parse_dms(dms):
    parts = dms.replace("°", "'").replace('"', "").split("'")
    lat = dms2dd(parts[0], parts[1], parts[2])

    return (lat)

In [111]:
df["Latitud"] = df["Latitud"].apply(parse_dms)
df["Longitud"] = df["Longitud"].apply(parse_dms)

In [112]:
df.head(30)

Unnamed: 0,Comuna,Region,Superficie,Latitud,Longitud
294,Santiago,Metropolitana de Santiago,23.2,-33.437222,-70.657222
295,Cerrillos,Metropolitana de Santiago,21.0,-33.5,-70.716667
296,Cerro Navia,Metropolitana de Santiago,11.0,-33.422,-70.735
297,Conchalí,Metropolitana de Santiago,10.7,-33.38,-70.675
298,El Bosque,Metropolitana de Santiago,14.2,-33.567,-70.675
299,Estación Central,Metropolitana de Santiago,15.0,-33.459,-70.699
300,Huechuraba,Metropolitana de Santiago,44.8,-33.368,-70.634
301,Independencia,Metropolitana de Santiago,7.0,-33.413,-70.666
302,La Cisterna,Metropolitana de Santiago,10.0,-33.529,-70.663
303,La Florida,Metropolitana de Santiago,70.2,-33.525,-70.538


In [113]:
CLIENT_ID = 'UJX0HF0KI0FCRJNZ10MKPGWOYATKIJH0QLG4KQFGMBHN0LA4'
CLIENT_SECRET = 'JRZQRGXVK0VZS3B24UEBKKY4ZTJWLGE1RPCWRZJMKEKHDIXK'
VERSION = '20180604'

In [116]:
def getNearbyVenues(names, latitudes, longitudes):
    radius=500
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [117]:
santiago_venues = getNearbyVenues(names=df['Comuna'],
                                   latitudes=df['Latitud'],
                                   longitudes=df['Longitud']
                                  )

Santiago
Cerrillos
Cerro Navia
Conchalí
El Bosque
Estación Central
Huechuraba
Independencia
La Cisterna
La Florida
La Granja
La Pintana
La Reina
Las Condes
Lo Barnechea
Lo Espejo
Lo Prado
Macul
Maipú
Ñuñoa
Pedro Aguirre Cerda
Peñalolén
Providencia
Pudahuel
Quilicura
Quinta Normal
Recoleta
Renca
San Joaquín
San Miguel
San Ramón
Vitacura
Puente Alto
Pirque
San José de Maipo
Colina
Lampa
Til Til
San Bernardo
Buin
Calera de Tango
Paine
Melipilla
Alhué
Curacaví
María Pinto
San Pedro
Talagante
El Monte
Isla de Maipo
Padre Hurtado
Peñaflor


In [121]:
santiago_venues["Venue Category"].unique()

array(['Plaza', 'Coffee Shop', 'Pool', 'Asian Restaurant',
       'Arepa Restaurant', 'Sandwich Place', 'Peruvian Restaurant',
       'Deli / Bodega', 'Museum', 'Market', 'Bed & Breakfast',
       'Bookstore', 'Sushi Restaurant', 'Burger Joint', 'Juice Bar',
       'History Museum', 'Gourmet Shop', 'Falafel Restaurant', 'Bakery',
       'Yoga Studio', 'Ice Cream Shop', 'Gym / Fitness Center',
       'Shopping Mall', 'Pizza Place', 'Mediterranean Restaurant',
       'Fried Chicken Joint', 'Donut Shop', 'Pie Shop', 'Diner', 'Café',
       'Concert Hall', 'South American Restaurant', 'Boutique',
       'Japanese Restaurant', 'Dance Studio', 'Bar', 'Gastropub',
       'Dive Bar', 'Italian Restaurant', 'Cafeteria', 'Food Service',
       'Restaurant', 'Stadium', 'Breakfast Spot', 'Miscellaneous Shop',
       'Fast Food Restaurant', 'Chinese Restaurant', 'Park',
       'Liquor Store', 'Cupcake Shop', 'Gym', 'Pharmacy', 'Dessert Shop',
       'Snack Place', 'Greek Restaurant', 'Food', 'Hot Do