In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

import numpy as np # library to handle data in a vectorized manner

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans
from sklearn.datasets.samples_generator import make_blobs

!conda install -c conda-forge folium=0.5.0 --yes
#=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    certifi-2019.6.16          |           py36_0         148 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.49-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

### Importing from Wikipedia

In [3]:
url = requests.get("https://es.wikipedia.org/wiki/Anexo:Localidades_de_Bogot%C3%A1").text

In [4]:
soup = BeautifulSoup(url,'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="es">
 <head>
  <meta charset="utf-8"/>
  <title>
   Anexo:Localidades de Bogotá - Wikipedia, la enciclopedia libre
  </title>
  <script>
   document.documentElement.className=document.documentElement.className.replace(/(^|\s)client-nojs(\s|$)/,"$1client-js$2");RLCONF={"wgCanonicalNamespace":"Anexo","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":104,"wgPageName":"Anexo:Localidades_de_Bogotá","wgTitle":"Localidades de Bogotá","wgCurRevisionId":114573108,"wgRevisionId":114573108,"wgArticleId":6319494,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Localidades de Bogotá","Anexos:Bogotá"],"wgBreakFrames":!1,"wgPageContentLanguage":"es","wgPageContentModel":"wikitext","wgSeparatorTransformTable":[",\t."," \t,"],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","enero","febrero","marzo","abril","mayo","junio","julio","agosto","septiembre","oc

In [5]:
table1 = soup.find('table',{'class':'sortable wikitable'})
table1

<table border="1" class="sortable wikitable">
<tbody><tr bgcolor="silver">
<th width="23"><center>Nº
</center></th>
<th width="110"><center> Localidad </center>
</th>
<th width="90"><center> Códigos Postales </center>
</th>
<th><center> Superficie km²<sup class="reference separada" id="cite_ref-2"><a href="#cite_note-2"><span class="corchete-llamada">[</span>2<span class="corchete-llamada">]</span></a></sup>​</center>
</th>
<th><center> Población<sup class="reference separada" id="cite_ref-3"><a href="#cite_note-3"><span class="corchete-llamada">[</span>3<span class="corchete-llamada">]</span></a></sup>​</center>
</th>
<th><center> Densidad hab/km²</center>
</th></tr>
<tr>
<td><b>01</b>
</td>
<td><a href="/wiki/Usaqu%C3%A9n" title="Usaquén">Usaquén</a>
</td>
<td>11<b>01</b>11-11<b>01</b>51
</td>
<td>65.31
</td>
<td>501 999
</td>
<td>7 686.4
</td></tr>
<tr>
<td><b>02</b>
</td>
<td><a href="/wiki/Chapinero" title="Chapinero">Chapinero</a>
</td>
<td>11<b>02</b>11-11<b>02</b>31
</td>
<td>3

In [7]:
links = table1.findAll('a')
links

[<a href="#cite_note-2"><span class="corchete-llamada">[</span>2<span class="corchete-llamada">]</span></a>,
 <a href="#cite_note-3"><span class="corchete-llamada">[</span>3<span class="corchete-llamada">]</span></a>,
 <a href="/wiki/Usaqu%C3%A9n" title="Usaquén">Usaquén</a>,
 <a href="/wiki/Chapinero" title="Chapinero">Chapinero</a>,
 <a href="/wiki/Santa_Fe_(Bogot%C3%A1)" title="Santa Fe (Bogotá)">Santa Fe</a>,
 <a href="/wiki/San_Crist%C3%B3bal_(Bogot%C3%A1)" title="San Cristóbal (Bogotá)">San Cristóbal</a>,
 <a href="/wiki/Usme" title="Usme">Usme</a>,
 <a href="/wiki/Tunjuelito" title="Tunjuelito">Tunjuelito</a>,
 <a href="/wiki/Bosa_(Bogot%C3%A1)" title="Bosa (Bogotá)">Bosa</a>,
 <a href="/wiki/Kennedy_(Bogot%C3%A1)" title="Kennedy (Bogotá)">Kennedy</a>,
 <a href="/wiki/Fontib%C3%B3n" title="Fontibón">Fontibón</a>,
 <a href="/wiki/Engativ%C3%A1" title="Engativá">Engativá</a>,
 <a href="/wiki/Suba" title="Suba">Suba</a>,
 <a href="/wiki/Barrios_Unidos_(Bogot%C3%A1)" title="Barrios 

In [9]:
BogotaDistricts = []
for link in links:
    BogotaDistricts.append(link.get('title'))
    
print(BogotaDistricts)

del BogotaDistricts[0:2]

BogotaDistricts

[None, None, 'Usaquén', 'Chapinero', 'Santa Fe (Bogotá)', 'San Cristóbal (Bogotá)', 'Usme', 'Tunjuelito', 'Bosa (Bogotá)', 'Kennedy (Bogotá)', 'Fontibón', 'Engativá', 'Suba', 'Barrios Unidos (Bogotá)', 'Teusaquillo', 'Los Mártires', 'Antonio Nariño (Bogotá)', 'Puente Aranda', 'La Candelaria', 'Rafael Uribe Uribe (Bogotá)', 'Ciudad Bolívar (Bogotá)', 'Sumapaz (Bogotá)']


['Usaquén',
 'Chapinero',
 'Santa Fe (Bogotá)',
 'San Cristóbal (Bogotá)',
 'Usme',
 'Tunjuelito',
 'Bosa (Bogotá)',
 'Kennedy (Bogotá)',
 'Fontibón',
 'Engativá',
 'Suba',
 'Barrios Unidos (Bogotá)',
 'Teusaquillo',
 'Los Mártires',
 'Antonio Nariño (Bogotá)',
 'Puente Aranda',
 'La Candelaria',
 'Rafael Uribe Uribe (Bogotá)',
 'Ciudad Bolívar (Bogotá)',
 'Sumapaz (Bogotá)']

### Generating our dataframe of the districts in Bogota

In [10]:
df = pd.DataFrame()
df['BogotaDistricts'] = BogotaDistricts
df

Unnamed: 0,BogotaDistricts
0,Usaquén
1,Chapinero
2,Santa Fe (Bogotá)
3,San Cristóbal (Bogotá)
4,Usme
5,Tunjuelito
6,Bosa (Bogotá)
7,Kennedy (Bogotá)
8,Fontibón
9,Engativá


In [11]:
def get_coords_local(localidad, output_as='center'):
    """
    get the bounding box of a locality in WGS84 given its name

    Parameters
    ----------
    localidad : str
        name of the country in english and lowercase
    output_as : 'str
        chose from 'boundingbox' or 'center'. 
         - 'boundingbox' for [latmin, latmax, lonmin, lonmax]
         - 'center' for [latcenter, loncenter]

    Returns
    -------
    output : list
        list with coordinates as str
    """
    # create url
    url = '{0}{1}{2}'.format('http://nominatim.openstreetmap.org/search.php?q=',
                             localidad+', Bogota, Bogota Capital District',
                             '&format=json&polygon=0')
    response = requests.get(url).json()[0]

    # parse response to list
    if output_as == 'boundingbox':
        lst = response[output_as]
        output = [float(i) for i in lst]
    if output_as == 'center':
        lst = [response.get(key) for key in ['lat','lon']]
        output = [float(i) for i in lst]
    return output

In [12]:
dfbogota = df.copy()

latitudeCln = []
longitudeCln = []
for index, row in dfbogota.iterrows():
    print(row[0])
    lat, long = get_coords_local(localidad=row[0], output_as='center')
    latitudeCln.append(lat)
    longitudeCln.append(long)

dfbogota['Latitude'] = latitudeCln
dfbogota['Longitude'] = longitudeCln

dfbogota.shape

Usaquén
Chapinero
Santa Fe (Bogotá)
San Cristóbal (Bogotá)
Usme
Tunjuelito
Bosa (Bogotá)
Kennedy (Bogotá)
Fontibón
Engativá
Suba
Barrios Unidos (Bogotá)
Teusaquillo
Los Mártires
Antonio Nariño (Bogotá)
Puente Aranda
La Candelaria
Rafael Uribe Uribe (Bogotá)
Ciudad Bolívar (Bogotá)
Sumapaz (Bogotá)


(20, 3)

In [13]:
dfbogota.head()

Unnamed: 0,BogotaDistricts,Latitude,Longitude
0,Usaquén,4.694969,-74.031093
1,Chapinero,4.645377,-74.061943
2,Santa Fe (Bogotá),4.602204,-74.078837
3,San Cristóbal (Bogotá),4.548658,-74.047473
4,Usme,4.411136,-74.129108


### Creating the map of Bogota

In [14]:
address = 'Bogotá, Colombia'

geolocator = Nominatim(user_agent="myProject")
location = geolocator.geocode(address, timeout=60, exactly_one=True)
latitude = location.latitude
longitude = location.longitude
print('Coordinates of Bogotá are {}, {}.'.format(latitude, longitude))

Coordinates of Bogotá are 4.59808, -74.0760439.


In [15]:
# create map of Bogotá using latitude and longitude values
map_bog = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, local in zip(dfbogota['Latitude'], dfbogota['Longitude'], dfbogota['BogotaDistricts']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_bog)  
    
map_bog

### Building the dataframe of the districts and venues

In [4]:
CLIENT_ID = 'N1LDZXH4NFDXTW4AATBMT3URRJGJ3JLYHHE34MKAUYRMRVNJ' # your Foursquare ID
CLIENT_SECRET = 'QF5Z1BWV2T5ZMYLKPCP5OSP4WNAOAQLG420S3PWKE1VB2T1R' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
limit = 500 # limit of number of venues returned by Foursquare API
radius = 5000

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentails:
CLIENT_ID: N1LDZXH4NFDXTW4AATBMT3URRJGJ3JLYHHE34MKAUYRMRVNJ
CLIENT_SECRET:QF5Z1BWV2T5ZMYLKPCP5OSP4WNAOAQLG420S3PWKE1VB2T1R


In [27]:
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['District', 
                  'District Latitude', 
                  'District Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)
     
        

In [28]:
CLIENT_ID = 'N1LDZXH4NFDXTW4AATBMT3URRJGJ3JLYHHE34MKAUYRMRVNJ' # your Foursquare ID
CLIENT_SECRET = 'QF5Z1BWV2T5ZMYLKPCP5OSP4WNAOAQLG420S3PWKE1VB2T1R' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 500 # limit of number of venues returned by Foursquare API
radius = 5000

In [30]:
burgerbog_venues = getNearbyVenues(names=dfbogota['BogotaDistricts'], latitudes=dfbogota['Latitude'], longitudes=dfbogota['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d16c941735')
burgerbog_venues.head()

Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Madison,4.695443,-74.030262,Burger Joint
1,Usaquén,4.694969,-74.031093,La Hamburguesería,4.69531,-74.031716,Burger Joint
2,Usaquén,4.694969,-74.031093,McDonald's,4.695702,-74.031881,Burger Joint
3,Usaquén,4.694969,-74.031093,Home Burgers,4.693883,-74.032853,Burger Joint
4,Usaquén,4.694969,-74.031093,El Taller 'Handmade Burgers & Beers',4.695879,-74.029299,Burger Joint


In [31]:
burgerbog_venues.shape

(178, 7)

### Mapping every burger in every district

In [32]:
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['District'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [34]:
burgerbog_map = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(burgerbog_venues, 'blue', burgerbog_map)
burgerbog_map

### Adding the highschool factor

In [37]:
bog_venues_highschools = getNearbyVenues(names=dfbogota['BogotaDistricts'], latitudes=dfbogota['Latitude'], longitudes=dfbogota['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d13d941735')
bog_venues_highschools.head()

Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Gimnasio Los Cerros,4.694464,-74.026556,School
1,Chapinero,4.645377,-74.061943,Colegio Nuestra Señora de Chiquinquira,4.6498,-74.065643,High School
2,Chapinero,4.645377,-74.061943,Colegio del Sagrado Corazón de Jesus Bethlemitas,4.649182,-74.058036,High School
3,Chapinero,4.645377,-74.061943,Colegio Distrital Simón Rodriguez,4.653948,-74.060732,High School
4,Chapinero,4.645377,-74.061943,Colegio Ervit,4.642634,-74.066469,High School


In [38]:
highschoolbog_map = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(bog_venues_highschools, 'green', highschoolbog_map)
highschoolbog_map

### Adding the university's factor

In [39]:
bog_venues_uni = getNearbyVenues(names=dfbogota['BogotaDistricts'], latitudes=dfbogota['Latitude'], longitudes=dfbogota['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d1ae941735')
bog_venues_uni.head()

Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Tecnologico de Monterry,4.6921,-74.034916,University
1,Usaquén,4.694969,-74.031093,Centrum Católica,4.690426,-74.038657,University
2,Usaquén,4.694969,-74.031093,UTadeo M7a,4.70539,-74.032827,University
3,Chapinero,4.645377,-74.061943,Universidad de La Salle,4.644576,-74.059379,University
4,Chapinero,4.645377,-74.061943,Instituto de Lenguas Universidad Distrital ILUD,4.641669,-74.066366,University


In [41]:
unibog_map = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(bog_venues_uni, 'gold', unibog_map)
unibog_map

### Adding the office's factor

In [42]:
officebog_venues = getNearbyVenues(names=dfbogota['BogotaDistricts'], latitudes=dfbogota['Latitude'], longitudes=dfbogota['Longitude'], radius=1000, categoryIds='4d4b7105d754a06375d81259')
officebog_venues.head()

Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Usaquén,4.694969,-74.031093,Centro Médico de la Sabana,4.696026,-74.031596,Doctor's Office
1,Usaquén,4.694969,-74.031093,Bistronomy,4.695679,-74.030267,French Restaurant
2,Usaquén,4.694969,-74.031093,WeWork Usaquén,4.694039,-74.032702,Coworking Space
3,Usaquén,4.694969,-74.031093,Fundacion Santa Fe de Bogota,4.695136,-74.033211,Hospital
4,Usaquén,4.694969,-74.031093,Santa Ana Medical Center,4.696085,-74.032065,Doctor's Office


In [43]:
officebog_map = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(officebog_venues, 'fuchsia', officebog_map)
officebog_map

### Grouping all the factors with dfbogota (burger venues and districts)

In [52]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('District').count()
    
    for n in startDf['District']:
        try:
            startDf.loc[startDf['District'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['District'] == n,columnTitle] = 0

In [56]:
dfbogota1 = dfbogota.copy()
dfbogota1.rename(columns={'BogotaDistricts':'District'}, inplace=True)
addColumn(dfbogota1, 'Burger', burgerbog_venues)
addColumn(dfbogota1, 'High Schools', bog_venues_highschools)
addColumn(dfbogota1, 'Universities', bog_venues_uni)
addColumn(dfbogota1, 'Offices', officebog_venues)
dfbogota1

Unnamed: 0,District,Latitude,Longitude,Burger,High Schools,Universities,Offices
0,Usaquén,4.694969,-74.031093,21.0,1.0,3.0,50.0
1,Chapinero,4.645377,-74.061943,43.0,4.0,37.0,50.0
2,Santa Fe (Bogotá),4.602204,-74.078837,23.0,2.0,22.0,50.0
3,San Cristóbal (Bogotá),4.548658,-74.047473,0.0,0.0,0.0,0.0
4,Usme,4.411136,-74.129108,0.0,0.0,0.0,0.0
5,Tunjuelito,4.561049,-74.127523,3.0,2.0,0.0,43.0
6,Bosa (Bogotá),4.625492,-74.20028,0.0,1.0,0.0,22.0
7,Kennedy (Bogotá),4.629682,-74.149935,12.0,1.0,1.0,49.0
8,Fontibón,4.673327,-74.144732,4.0,3.0,2.0,49.0
9,Engativá,4.708695,-74.109643,12.0,2.0,1.0,46.0


### The final INDEX

In [64]:
# negative effect (high competitivenes high supply less oportunities)
burger_weight = -1

# positive effect 
school_weight = 1

# positive effect
uni_weight = 1

# positive effect
office_weight = 1.5



In [65]:
dfweighted = dfbogota1[['District']].copy()

In [66]:
dfweighted['Score'] = dfbogota1['Burger'] * burger_weight + dfbogota1['High Schools'] * school_weight + dfbogota1['Universities'] * uni_weight + dfbogota1['Offices'] * office_weight
dfweighted = dfweighted.sort_values(by=['Score'], ascending=False)
dfweighted


Unnamed: 0,District,Score
16,La Candelaria,87.0
2,Santa Fe (Bogotá),76.0
8,Fontibón,74.5
14,Antonio Nariño (Bogotá),74.0
1,Chapinero,73.0
13,Los Mártires,70.5
17,Rafael Uribe Uribe (Bogotá),70.0
12,Teusaquillo,70.0
11,Barrios Unidos (Bogotá),69.5
15,Puente Aranda,68.5


## FINAL RESULT: It seems that La Candelaria is the best option as a District to open the new burger restaurant.