## Selecting a place to build a new shopping mall in Quito, Ecuador
### Capstone Project – Final Report - Applied Data Science Capstone

This notebook is where all the calculations/code for this project where made. For the complete report of this study, go to the following link:
https://github.com/And24Esp/Coursera_Capstone/blob/master/CapstoneProject-FinalReport%20(wk5).pdf

In [1]:
#Install requiered packages 
!conda install -c conda-forge geocoder --yes
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.4 MB

The following NEW packages will be INSTALLED:

    geocoder:        1.38.1-py_1       conda-forge
    ratelim:         0.1.6-py_2        conda-forge

The following packages will be UPDATED:

    

In [37]:
# Import libraries
import numpy as np # library to handle data in a vectorized manner

import geocoder # to get coordinates

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

In [3]:
# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/Category:Parishes_of_Quito_Canton").text

In [4]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [5]:
# create list to store data
neighborhoodList = []

In [6]:
# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

In [8]:
# create new DataFrame from list
uio_df = pd.DataFrame({"Neighborhood": neighborhoodList})
uio_df.head()

Unnamed: 0,Neighborhood
0,"Alfaro, Quito"
1,Benalcázar
2,Calacalí
3,"Calderón, Quito"
4,Chaupicruz


In [9]:
# print number of rows of the dataframe
uio_df.shape

(31, 1)

In [10]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Quito, Ecuador'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [11]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in uio_df["Neighborhood"].tolist() ]

In [12]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [13]:
# merge coordinates into the original dataframe
uio_df['Latitude'] = df_coords['Latitude']
uio_df['Longitude'] = df_coords['Longitude']

In [14]:
# check neighborhoods and the coordinates
print(uio_df.shape)
uio_df

(31, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,"Alfaro, Quito",-0.280763,-78.556043
1,Benalcázar,-0.082714,-78.430165
2,Calacalí,0.04116,-78.53254
3,"Calderón, Quito",-0.2056,-78.5088
4,Chaupicruz,-0.15816,-78.48632
5,Chillogallo,-0.075415,-78.432173
6,"Cotocollao, Quito",-0.11839,-78.49405
7,Cumbayá,-0.20306,-78.43804
8,El Quinche,-0.10996,-78.29858
9,"El Salvador, Quito",-0.193713,-78.486892


In [15]:
# get coordinates of Quito
address = 'Quito, Ecuador'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Quito, Ecuador {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Quito, Ecuador -0.2201641, -78.5123274.


In [16]:
# create map of Quito using latitude and longitude values
map_uio = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(uio_df['Latitude'], uio_df['Longitude'], uio_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_uio)  
    
map_uio

In [17]:
# store Foursquare Credentials and Version
CLIENT_ID = 'QBG3N4O3WHZ210S5XVIEHSWSXPEK5AFKPM5EA1ZDUK2NE1RI' # your Foursquare ID
CLIENT_SECRET = 'JR34XGVF3SQK050CEQGRNGNKU2GWQUXDXJVUIIM0KVMR0CEI' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: QBG3N4O3WHZ210S5XVIEHSWSXPEK5AFKPM5EA1ZDUK2NE1RI
CLIENT_SECRET:JR34XGVF3SQK050CEQGRNGNKU2GWQUXDXJVUIIM0KVMR0CEI


In [20]:
# Get the top 100 venues that are within a radius of 2000 meters.

radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(uio_df['Latitude'], uio_df['Longitude'], uio_df['Neighborhood']):
    
    # create API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information of each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [21]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1619, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,"Alfaro, Quito",-0.280763,-78.556043,Supercines,-0.285233,-78.543765,Multiplex
1,"Alfaro, Quito",-0.280763,-78.556043,Vaco y Vaca,-0.286154,-78.542474,Steakhouse
2,"Alfaro, Quito",-0.280763,-78.556043,Sweet & Coffee,-0.285454,-78.542968,Café
3,"Alfaro, Quito",-0.280763,-78.556043,El Español,-0.28518,-78.5443,Sandwich Place
4,"Alfaro, Quito",-0.280763,-78.556043,Juan Valdez Café,-0.285174,-78.543769,Coffee Shop


In [22]:
# check how many venues were returned per neighorhood
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alfaro, Quito",22,22,22,22,22,22
Benalcázar,4,4,4,4,4,4
"Calderón, Quito",100,100,100,100,100,100
Chaupicruz,100,100,100,100,100,100
Chillogallo,4,4,4,4,4,4
"Cotocollao, Quito",71,71,71,71,71,71
Cumbayá,93,93,93,93,93,93
El Quinche,6,6,6,6,6,6
"El Salvador, Quito",100,100,100,100,100,100
González Suárez,100,100,100,100,100,100


In [23]:
# check number of unique categories can be curated from all the returned venues
print('{} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

146 uniques categories.


In [24]:
# check if results contain "Shopping Mall"
"Shopping Mall" in venues_df['VenueCategory'].unique()

True

In [25]:
# one hot encoding
uio_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
uio_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [uio_onehot.columns[-1]] + list(uio_onehot.columns[:-1])
uio_onehot = uio_onehot[fixed_columns]

print(uio_onehot.shape)
uio_onehot.head()

(1619, 147)


Unnamed: 0,Neighborhoods,Airport Terminal,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bike Shop,Breakfast Spot,Brewery,Burger Joint,Bus Station,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Coffee Shop,Comedy Club,Concert Hall,Construction & Landscaping,Convenience Store,Creperie,Deli / Bodega,Department Store,Dessert Shop,Diner,Dog Run,Donut Shop,Electronics Store,Empanada Restaurant,Event Space,Factory,Farmers Market,Fast Food Restaurant,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Gastropub,General Entertainment,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Historic Site,History Museum,Home Service,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Light Rail Station,Lounge,Market,Mexican Restaurant,Middle Eastern Restaurant,Monument / Landmark,Motel,Mountain,Movie Theater,Multiplex,Museum,Music Venue,Nightclub,Noodle House,Other Great Outdoors,Paella Restaurant,Paintball Field,Park,Peruvian Restaurant,Pharmacy,Pie Shop,Pizza Place,Planetarium,Plaza,Pool,Pool Hall,Pub,Rental Car Location,Restaurant,Salad Place,Salsa Club,Sandwich Place,Scenic Lookout,Science Museum,Sculpture Garden,Seafood Restaurant,Shop & Service,Shopping Mall,Snack Place,Soccer Field,Soccer Stadium,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Swiss Restaurant,Taco Place,Tea Room,Tennis Court,Tex-Mex Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Tourist Information Center,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Veterinarian,Wine Bar,Wings Joint,Women's Store,Zoo
0,"Alfaro, Quito",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Alfaro, Quito",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Alfaro, Quito",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Alfaro, Quito",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Alfaro, Quito",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [26]:
# group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
uio_grouped = uio_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(uio_grouped.shape)
uio_grouped

(28, 147)


Unnamed: 0,Neighborhoods,Airport Terminal,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bike Shop,Breakfast Spot,Brewery,Burger Joint,Bus Station,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Coffee Shop,Comedy Club,Concert Hall,Construction & Landscaping,Convenience Store,Creperie,Deli / Bodega,Department Store,Dessert Shop,Diner,Dog Run,Donut Shop,Electronics Store,Empanada Restaurant,Event Space,Factory,Farmers Market,Fast Food Restaurant,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Gastropub,General Entertainment,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Historic Site,History Museum,Home Service,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Light Rail Station,Lounge,Market,Mexican Restaurant,Middle Eastern Restaurant,Monument / Landmark,Motel,Mountain,Movie Theater,Multiplex,Museum,Music Venue,Nightclub,Noodle House,Other Great Outdoors,Paella Restaurant,Paintball Field,Park,Peruvian Restaurant,Pharmacy,Pie Shop,Pizza Place,Planetarium,Plaza,Pool,Pool Hall,Pub,Rental Car Location,Restaurant,Salad Place,Salsa Club,Sandwich Place,Scenic Lookout,Science Museum,Sculpture Garden,Seafood Restaurant,Shop & Service,Shopping Mall,Snack Place,Soccer Field,Soccer Stadium,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Swiss Restaurant,Taco Place,Tea Room,Tennis Court,Tex-Mex Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Tourist Information Center,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Veterinarian,Wine Bar,Wings Joint,Women's Store,Zoo
0,"Alfaro, Quito",0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.136364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,0.0,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Benalcázar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Calderón, Quito",0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.02,0.0,0.03,0.0,0.02,0.03,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.0,0.01,0.04,0.01,0.01,0.01,0.03,0.0,0.07,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.03,0.02,0.0,0.01,0.0,0.07,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.01,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.01,0.05,0.01,0.0,0.0,0.0,0.08,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0
3,Chaupicruz,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.07,0.01,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.02,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.01,0.01,0.01,0.02,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.05,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.01,0.05,0.0,0.01,0.0,0.0,0.0,0.0,0.07,0.0,0.03,0.01,0.02,0.0,0.03,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0
4,Chillogallo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cotocollao, Quito",0.014085,0.014085,0.0,0.028169,0.0,0.0,0.0,0.0,0.0,0.042254,0.0,0.028169,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.084507,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.014085,0.028169,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.014085,0.014085,0.014085,0.0,0.014085,0.014085,0.0,0.056338,0.0,0.0,0.0,0.014085,0.0,0.028169,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028169,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.014085,0.014085,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028169,0.0,0.014085,0.0,0.056338,0.0,0.0,0.0,0.0,0.0,0.0,0.028169,0.0,0.0,0.028169,0.0,0.0,0.0,0.084507,0.0,0.028169,0.0,0.028169,0.014085,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.014085,0.056338,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0
6,Cumbayá,0.0,0.021505,0.0,0.021505,0.0,0.0,0.0,0.0,0.0,0.010753,0.0,0.021505,0.021505,0.0,0.0,0.010753,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.010753,0.0,0.010753,0.0,0.0,0.0,0.021505,0.0,0.0,0.0,0.010753,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010753,0.032258,0.0,0.010753,0.0,0.010753,0.0,0.010753,0.010753,0.0,0.010753,0.021505,0.010753,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.043011,0.010753,0.0,0.010753,0.0,0.0,0.0,0.021505,0.021505,0.0,0.0,0.0,0.043011,0.0,0.0,0.0,0.021505,0.0,0.0,0.0,0.0,0.010753,0.0,0.032258,0.0,0.043011,0.0,0.0,0.0,0.0,0.0,0.0,0.053763,0.010753,0.0,0.021505,0.0,0.0,0.0,0.021505,0.0,0.043011,0.010753,0.010753,0.0,0.010753,0.010753,0.0,0.010753,0.0,0.0,0.0,0.010753,0.010753,0.010753,0.0,0.010753,0.010753,0.010753,0.0,0.010753,0.0,0.010753,0.0,0.010753,0.021505,0.010753,0.0,0.0,0.021505,0.0,0.0
7,El Quinche,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"El Salvador, Quito",0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.03,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.05,0.0,0.01,0.01,0.03,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.01,0.02,0.01,0.0,0.06,0.01,0.0,0.02,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.02,0.02,0.01,0.05,0.0,0.01,0.0,0.0,0.02,0.0,0.03,0.01,0.0,0.02,0.01,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0
9,González Suárez,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.02,0.0,0.03,0.0,0.02,0.04,0.0,0.01,0.0,0.0,0.0,0.01,0.03,0.06,0.0,0.0,0.01,0.06,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.04,0.01,0.0,0.01,0.01,0.04,0.01,0.01,0.03,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.07,0.0,0.01,0.0,0.0,0.03,0.0,0.04,0.01,0.0,0.01,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0


In [28]:
# Create a new DataFrame for Shopping Mall data only
uio_mall = uio_grouped[["Neighborhoods","Shopping Mall"]]
uio_mall.head()

Unnamed: 0,Neighborhoods,Shopping Mall
0,"Alfaro, Quito",0.045455
1,Benalcázar,0.0
2,"Calderón, Quito",0.01
3,Chaupicruz,0.03
4,Chillogallo,0.0


In [29]:
# set number of clusters
kclusters = 3

uio_clustering = uio_mall.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(uio_clustering)

# check cluster labels generated for each row in dataframe
kmeans.labels_[0:10]

array([1, 0, 0, 2, 0, 2, 1, 0, 0, 0], dtype=int32)

In [32]:
# create new dataframe that includes the cluster as well as the top 10 venues for each parish.
uio_merged = uio_mall.copy()

# add clustering labels
uio_merged["Cluster Labels"] = kmeans.labels_

uio_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
uio_merged.head()

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels
0,"Alfaro, Quito",0.045455,1
1,Benalcázar,0.0,0
2,"Calderón, Quito",0.01,0
3,Chaupicruz,0.03,2
4,Chillogallo,0.0,0


In [33]:
# merge Quito_grouped with Quito_data to add latitude/longitude for each neighborhood
uio_merged = uio_merged.join(uio_df.set_index("Neighborhood"), on="Neighborhood")

print(uio_merged.shape)
uio_merged.head()

(28, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,"Alfaro, Quito",0.045455,1,-0.280763,-78.556043
1,Benalcázar,0.0,0,-0.082714,-78.430165
2,"Calderón, Quito",0.01,0,-0.2056,-78.5088
3,Chaupicruz,0.03,2,-0.15816,-78.48632
4,Chillogallo,0.0,0,-0.075415,-78.432173


In [34]:
# sort results by Cluster Labels
print(uio_merged.shape)
uio_merged.sort_values(["Cluster Labels"], inplace=True)
uio_merged

(28, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
27,"Santa Prisca, Quito",0.01,0,-0.214632,-78.50456
17,La Vicentina,0.0,0,-0.2165,-78.48793
16,"La Magdalena, Quito",0.0,0,-0.246774,-78.495721
15,"La Libertad, Quito",0.0,0,-0.28523,-78.58038
14,"La Floresta, Quito",0.0,0,-0.20953,-78.48097
26,"Santa Bárbara, Quito",0.0,0,-0.20321,-78.48236
11,Guápulo,0.01,0,-0.2,-78.48333
10,Guayllabamba,0.0,0,-0.05299,-78.34964
20,Puengasí,0.0,0,-0.25,-78.5
9,González Suárez,0.0,0,-0.202104,-78.480342


In [38]:
# Finally, let's visualize the resulting clusters

# create map
uio_clustersmap = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(uio_merged['Latitude'], uio_merged['Longitude'], uio_merged['Neighborhood'], uio_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(uio_clustersmap)
       
uio_clustersmap

In [40]:
#Review cluster 0
uio_merged.loc[uio_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
27,"Santa Prisca, Quito",0.01,0,-0.214632,-78.50456
17,La Vicentina,0.0,0,-0.2165,-78.48793
16,"La Magdalena, Quito",0.0,0,-0.246774,-78.495721
15,"La Libertad, Quito",0.0,0,-0.28523,-78.58038
14,"La Floresta, Quito",0.0,0,-0.20953,-78.48097
26,"Santa Bárbara, Quito",0.0,0,-0.20321,-78.48236
11,Guápulo,0.01,0,-0.2,-78.48333
10,Guayllabamba,0.0,0,-0.05299,-78.34964
20,Puengasí,0.0,0,-0.25,-78.5
9,González Suárez,0.0,0,-0.202104,-78.480342


In [41]:
#Review cluster 1
uio_merged.loc[uio_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
21,San Antonio de Pichincha,0.058824,1,-0.00837,-78.44684
0,"Alfaro, Quito",0.045455,1,-0.280763,-78.556043
6,Cumbayá,0.043011,1,-0.20306,-78.43804
18,"Nono, Ecuador",0.0625,1,-0.260566,-78.549491


In [42]:
#Review cluster 2
uio_merged.loc[uio_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
12,Itchimbía,0.02,2,-0.221923,-78.500875
5,"Cotocollao, Quito",0.028169,2,-0.11839,-78.49405
3,Chaupicruz,0.03,2,-0.15816,-78.48632
25,"San Sebastián, Quito",0.017857,2,-0.116312,-78.480754
13,Iñaquito,0.03,2,-0.177577,-78.488
