#  Agrupación y Segmentación de Vecindarios en la Ciudad de Toronto

## Librerias a Emplear

In [142]:
import numpy as np # librería para manejar datos vectorizados

import pandas as pd # librería para análisis de datos
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # librería para manejar archivos JSON 

#!conda install -c conda-forge geopy --yes # retirar el comentario de esta línea si no ha completado el laboratorio de la API de FourSquare 
from geopy.geocoders import Nominatim # convertir una dirección en valores de latitud y longitud

import requests # librería para manejar solicitudes
from pandas.io.json import json_normalize # librería para convertir un archivo json en un dataframe pandas

# Matplotlib y módulos asociados para graficar
import matplotlib.cm as cm
import matplotlib.colors as colors

# importar k-means desde la fase de agrupación
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # retirar el comentario de esta línea si no ha completado el laboratorio de la API de FourSquare
import folium # librería para graficar mapas 

print('Libraries imported.')

Libraries imported.


In [143]:
from bs4 import BeautifulSoup
import matplotlib.cm as cm
import matplotlib.colors as colors

## Lectura de Datos a Emplear

In [144]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
req = requests.get(url)
soup = BeautifulSoup(req.content, "html.parser")

In [145]:
tabla = soup.find('table')

In [146]:
tabla_final = []

for i in range(0,9):
    for j in tabla.find_all('tr'):
        cols = j.find_all('p')
        tabla_final.append(cols[i].getText())

In [147]:
df = pd.DataFrame(tabla_final)
df.head()

Unnamed: 0,0
0,M1ANot assigned\n
1,M1BScarborough(Malvern / Rouge)\n
2,M1CScarborough(Rouge Hill / Port Union / Highl...
3,M1EScarborough(Guildwood / Morningside / West ...
4,M1GScarborough(Woburn)\n


Le damos un formato a la tabla, para obtener la tabla solicitada

In [148]:
df['Postal Code'] = df[0].str[0:3]
df["Borough"]=df[0].str[3:].str.split("(",n=1,expand=True)[0]
df["Neigh"]=df[0].str.rsplit("(",n=2,expand=True)[1].str.split(")",n=1,expand=True)[0]
df["Neighborhood"]=df["Neigh"].str.replace(" / ",",")

In [149]:
df.head()

Unnamed: 0,0,Postal Code,Borough,Neigh,Neighborhood
0,M1ANot assigned\n,M1A,Not assigned\n,,
1,M1BScarborough(Malvern / Rouge)\n,M1B,Scarborough,Malvern / Rouge,"Malvern,Rouge"
2,M1CScarborough(Rouge Hill / Port Union / Highl...,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,"Rouge Hill,Port Union,Highland Creek"
3,M1EScarborough(Guildwood / Morningside / West ...,M1E,Scarborough,Guildwood / Morningside / West Hill,"Guildwood,Morningside,West Hill"
4,M1GScarborough(Woburn)\n,M1G,Scarborough,Woburn,Woburn


In [150]:
df_final=df[(df["Borough"]!="Not assigned\n")]
df_final=df_final.drop([0,"Neigh"],axis=1)

In [382]:
df_final.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
1,M1B,Scarborough,"Malvern,Rouge"
2,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek"
3,M1E,Scarborough,"Guildwood,Morningside,West Hill"
4,M1G,Scarborough,Woburn
5,M1H,Scarborough,Cedarbrae


In [152]:
df_final.shape

(103, 3)

## Obtención de las coordenadas de Latitud y Longitud

In [153]:
file_name ='https://cocl.us/Geospatial_data'
Geoconder = pd.read_csv(file_name)

In [220]:
neighborhoods=pd.merge(df_final,Geoconder,on="Postal Code")

In [383]:
neighborhoods.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [264]:
type(neighborhoods)

pandas.core.frame.DataFrame

### Observamos cuantos barrios unicos hay en Toronto

In [265]:
print('There are {} uniques Borough.'.format(len(neighborhoods['Borough'].unique())))

There are 15 uniques Borough.


In [266]:
neighborhoods['Borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'East YorkEast Toronto', 'Central Toronto', 'Downtown Toronto',
       'Downtown TorontoStn A PO Boxes25 The Esplanade', 'York',
       'West Toronto', "Queen's Park",
       'MississaugaCanada Post Gateway Processing Centre',
       'East TorontoBusiness reply mail Processing Centre969 Eastern',
       'Etobicoke', 'EtobicokeNorthwest'], dtype=object)

## Agrupación y segmentación de los vecindarios

#### Utilice la librería geopy para obtener la latitud y la longitud de la Ciudad de Toronto

Para poder definir una instancia del geocoder necesitaremos definir un user_agent. Nombraremos a nuestro agente ny_explorer, como se muestra a continuación.

In [267]:
address = 'TORONTO,CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Genere un mapa de Toronto con los barrios super puestos por encima.


In [197]:
# crear un mapa de Nueva York utilizando los valores de latitud y longitud
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# añadir marcadores al mapa
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

## Explorar Barrios en Toronto

In [268]:
neighborhood_latitude = neighborhoods.loc[0, 'Latitude'] # latitud del barrio 
neighborhood_longitude = neighborhoods.loc[0, 'Longitude'] # longitud del barrio

neighborhood_name = neighborhoods.loc[0, 'Neighborhood'] # nombre del barrio

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Malvern,Rouge are 43.806686299999996, -79.19435340000001.


Procedemos a crear una función para obtener ciertos datos de la API de Foursquare

In [351]:
def getNearbyVenues(names, latitudes, longitudes, radius=500,limit=10):
    
    URL='https://api.foursquare.com/v3/places/search?ll={},{}&radius={}&limit={}'
    headers = {
        'Accept': 'application/json',
        'Authorization': 'fsq3z8M+K9/afb++fE1PLmG6xOKTPAj/SsruzHI0bYoklOk='
    }
    df_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        url = URL.format(lat, lng, radius, limit)
        results = requests.get(url,headers = headers).json()
        
        for i in range(0,len(results)):
            result = {}
            result['Neighborhood']=name
            result['Neighborhood Latitude']=lat
            result['Neighborhood Longitude']=lng
            result['Name']=results.get('results')[i]['name']
            result['Venue Latitude']=results.get('results')[i]['geocodes']['main']['latitude']
            result['Venue Longitude']=results.get('results')[i]['geocodes']['main']['longitude']
            if len(results.get('results')[i]['categories'])!= 0:
                result['Category_Names']=results.get('results')[i]['categories'][0]['name']
            else:
                result['Category_Names']= 0
            df_list.append(result.copy())
        
    return pd.DataFrame(df_list)

#### Ahora ejecutamos la función anterior para cada categoria y creamos un nuevo dataframe llamado Toronto_venues


In [352]:
Toronto_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude'])

Eliminamos los elementos que le asignamos 0 en la columna 'Category_Names'

In [355]:
Toronto_venues.drop(Toronto_venues[Toronto_venues['Category_Names'] == 0].index, inplace=True)

(206, 7)


#### Revisemos el tamaño del dataframe resultante

In [357]:
print(Toronto_venues.shape)
Toronto_venues.head()

(187, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Name,Venue Latitude,Venue Longitude,Category_Names
0,"Malvern,Rouge",43.806686,-79.194353,Shine Party Rentals,43.80625,-79.193488,Party Supply Store
1,"Malvern,Rouge",43.806686,-79.194353,Mtech Ltd,43.80625,-79.193488,Machine Shop
2,"Rouge Hill,Port Union,Highland Creek",43.784535,-79.160497,Contact Cabling Systems Inc,43.784055,-79.159996,Business and Professional Services
3,"Rouge Hill,Port Union,Highland Creek",43.784535,-79.160497,The Renobuild Group Inc,43.785056,-79.160947,General Contractor
4,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Dodds Plumbing Ltd,43.763692,-79.189181,Plumber


Revisemos cuantos sitios se regresarón para cada barrio

In [358]:
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Name,Venue Latitude,Venue Longitude,Category_Names
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,2,2,2,2,2,2
"Alderwood,Long Branch",2,2,2,2,2,2
"Bathurst Manor,Wilson Heights,Downsview North",2,2,2,2,2,2
Bayview Village,2,2,2,2,2,2
"Bedford Park,Lawrence Manor East",2,2,2,2,2,2
Berczy Park,2,2,2,2,2,2
"Birch Cliff,Cliffside West",2,2,2,2,2,2
"Brockton,Parkdale Village,Exhibition Place",2,2,2,2,2,2
"CN Tower,King and Spadina,Railway Lands,Harbourfront West,Bathurst Quay,South Niagara,Island airport",1,1,1,1,1,1
Caledonia-Fairbanks,1,1,1,1,1,1


Encontremos cuantas categorías únicas se pueden conservar de todos los sitios regresados

In [359]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Category_Names'].unique())))

There are 102 uniques categories.


## Analizar Cada Barrio

In [360]:
# codificación
Toronto_onehot = pd.get_dummies(Toronto_venues[['Category_Names']], prefix="", prefix_sep="")

# añadir la columna de barrio de regreso al dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

# mover la columna de barrio a la primer columna
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accounting and Bookkeeping Service,Advertising Agency,Architecture Firm,Art Gallery,Asian Restaurant,Automotive Repair Shop,BBQ Joint,Bakery,Bank,Bar,Barbershop,Baseball Field,Basketball Court,Bookstore,Breakfast Spot,Brewery,Burrito Restaurant,Business and Professional Services,Business and Strategy Consulting Office,"Cafes, Coffee, and Tea Houses",Café,Candy Store,Car Dealership,Car Wash and Detail,Chinese Restaurant,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli,Department Store,Diner,Dog Park,Doors and Windows Contractor,Electrician,Elementary School,Event Service,Farmers' Market,Fast Food Restaurant,Film Studio,Fried Chicken Joint,General Contractor,Golf,Gourmet Store,Greek Restaurant,Grocery Store / Supermarket,Gym and Studio,Hakka Restaurant,Harbor / Marina,Health and Beauty Service,Hiking Trail,Historic and Protected Site,Hobby Store,Home Improvement Service,Home Inspection,Ice Cream Parlor,Industrial Equipment Supplier,Italian Restaurant,Japanese Restaurant,Korean BBQ Restaurant,Landmarks and Outdoors,Landscaper and Gardener,Library,Liquor Store,Loans Agency,Lounge,Machine Shop,Massage Clinic,Media Agency,Metals Supplier,Mexican Restaurant,Nail Salon,Night Club,Nutritionist,Organization,Painter,Park,Party Supply Store,Photographer,Photography Lab,Pizzeria,Playground,Plumber,Professional Cleaning Service,Property Management Office,Public and Social Service,Real Estate Agency,Recording Studio,Repair Service,Restaurant,"Shipping, Freight, and Material Transportation Service",Spa,Sporting Goods Retail,Sushi Restaurant,Telecommunication Service,Tennis Court,Tire Repair Shop,Toy / Game Store
0,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Rouge Hill,Port Union,Highland Creek",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Rouge Hill,Port Union,Highland Creek",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Examinemos el tamaño del nuevo dataframe

In [361]:
Toronto_onehot.shape

(187, 103)

#### Agrupemos las filas por barrios tomando la média de la frecuancia de la ocurrencia de cada categoría

In [362]:
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighborhood,ATM,Accounting and Bookkeeping Service,Advertising Agency,Architecture Firm,Art Gallery,Asian Restaurant,Automotive Repair Shop,BBQ Joint,Bakery,Bank,Bar,Barbershop,Baseball Field,Basketball Court,Bookstore,Breakfast Spot,Brewery,Burrito Restaurant,Business and Professional Services,Business and Strategy Consulting Office,"Cafes, Coffee, and Tea Houses",Café,Candy Store,Car Dealership,Car Wash and Detail,Chinese Restaurant,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli,Department Store,Diner,Dog Park,Doors and Windows Contractor,Electrician,Elementary School,Event Service,Farmers' Market,Fast Food Restaurant,Film Studio,Fried Chicken Joint,General Contractor,Golf,Gourmet Store,Greek Restaurant,Grocery Store / Supermarket,Gym and Studio,Hakka Restaurant,Harbor / Marina,Health and Beauty Service,Hiking Trail,Historic and Protected Site,Hobby Store,Home Improvement Service,Home Inspection,Ice Cream Parlor,Industrial Equipment Supplier,Italian Restaurant,Japanese Restaurant,Korean BBQ Restaurant,Landmarks and Outdoors,Landscaper and Gardener,Library,Liquor Store,Loans Agency,Lounge,Machine Shop,Massage Clinic,Media Agency,Metals Supplier,Mexican Restaurant,Nail Salon,Night Club,Nutritionist,Organization,Painter,Park,Party Supply Store,Photographer,Photography Lab,Pizzeria,Playground,Plumber,Professional Cleaning Service,Property Management Office,Public and Social Service,Real Estate Agency,Recording Studio,Repair Service,Restaurant,"Shipping, Freight, and Material Transportation Service",Spa,Sporting Goods Retail,Sushi Restaurant,Telecommunication Service,Tennis Court,Tire Repair Shop,Toy / Game Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor,Wilson Heights,Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park,Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0
5,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Birch Cliff,Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Brockton,Parkdale Village,Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"CN Tower,King and Spadina,Railway Lands,Harbou...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Confirmemos el nuevo tamaño

In [363]:
Toronto_grouped.shape

(98, 103)

#### Imprimamos cada barrio junto con los 5 sitios mas comunes

In [364]:
num_top_venues = 5

for hood in Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                     venue  freq
0               Restaurant   0.5
1  Landscaper and Gardener   0.5
2                      ATM   0.0
3   Landmarks and Outdoors   0.0
4       Mexican Restaurant   0.0


----Alderwood,Long Branch----
                venue  freq
0        Dance Studio   0.5
1            Pizzeria   0.5
2                 ATM   0.0
3          Night Club   0.0
4  Mexican Restaurant   0.0


----Bathurst Manor,Wilson Heights,Downsview North----
                           venue  freq
0  Cafes, Coffee, and Tea Houses   0.5
1                           Deli   0.5
2                            ATM   0.0
3                     Night Club   0.0
4             Mexican Restaurant   0.0


----Bayview Village----
                                venue  freq
0  Business and Professional Services   0.5
1                               Diner   0.5
2                                 ATM   0.0
3                          Night Club   0.0
4                  Mexican Restaurant   0.0


--

#### Pongamos eso en el dataframe

Primero escribamos una función para ordenar los sitios en orden descendente.

In [365]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Generemos el nuevo dataframe y mostremos los primeros 10 sitios de cada barrio.

In [373]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# crear las columnas acorde al numero de sitios populares
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# crear un nuevo dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()
#len(neighborhoods_venues_sorted)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Restaurant,Landscaper and Gardener,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
1,"Alderwood,Long Branch",Dance Studio,Pizzeria,Toy / Game Store,Car Wash and Detail,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store
2,"Bathurst Manor,Wilson Heights,Downsview North","Cafes, Coffee, and Tea Houses",Deli,Toy / Game Store,Chinese Restaurant,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
3,Bayview Village,Business and Professional Services,Diner,Toy / Game Store,Chinese Restaurant,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
4,"Bedford Park,Lawrence Manor East",Sushi Restaurant,Ice Cream Parlor,Toy / Game Store,Doors and Windows Contractor,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio


## 4. Barrios Agrupados


Ejecutemos k-means para agrupar los barrios en 5 agrupaciones.

In [367]:
# establecer el número de agrupaciones
kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# ejecutar k-means
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# revisar las etiquetas de las agrupaciones generadas para cada fila del dataframe
kmeans.labels_[0:10] 


array([3, 3, 4, 3, 3, 3, 3, 3, 3, 3])

Generemos un nuevo dataframe que incluya la agrupación asi como los 10 sitios mas populares de cada barrio.

In [375]:
# añadir etiquetas
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Toronto_merged = neighborhoods

# juntar manhattan_grouped con manhattan_data 
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Toronto_merged.head() # revisar las ultimas columnas

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353,3,Machine Shop,Party Supply Store,Toy / Game Store,Chinese Restaurant,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek",43.784535,-79.160497,2,General Contractor,Business and Professional Services,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,3,Plumber,Toy / Game Store,Car Wash and Detail,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
3,M1G,Scarborough,Woburn,43.770992,-79.216917,3,Korean BBQ Restaurant,Cosmetics Store,Toy / Game Store,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Dance Studio,Deli,Department Store
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,3,Hakka Restaurant,Loans Agency,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio


Finalmente visualicemos las agrupaciones resultantes

In [376]:
# crear mapa
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# establecer el esquema de color para las agrupaciones
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# añadir marcadores al mapa
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighborhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examinar Agrupaciones

Ahora puede examinar cada agrupación y determinar las categorias del sitio que distingue a cada agrupación. En base a las categorias definidas usted puede asignar un nombre a cada agrupación. Dejaré este ejercicio para usted.

#### Agrupación 1

In [377]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Scarborough,0,Clothing Store,Toy / Game Store,Doors and Windows Contractor,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli,Department Store
52,Downtown Toronto,0,Clothing Store,Bar,Toy / Game Store,Doors and Windows Contractor,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli
67,Downtown Toronto,0,Clothing Store,Barbershop,Toy / Game Store,Doors and Windows Contractor,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli
71,North York,0,Clothing Store,Toy / Game Store,Doors and Windows Contractor,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli,Department Store
88,Etobicoke,0,Clothing Store,Home Improvement Service,Toy / Game Store,Doors and Windows Contractor,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli


#### Agrupación 2


In [378]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,1,Park,"Shipping, Freight, and Material Transportation...",Toy / Game Store,Dog Park,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store
23,North York,1,Tennis Court,Park,Toy / Game Store,Dog Park,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store
25,North York,1,Park,Toy / Game Store,Car Wash and Detail,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
37,East Toronto,1,Hiking Trail,Park,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
42,East Toronto,1,Park,Automotive Repair Shop,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
48,Central Toronto,1,Park,Home Inspection,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
50,Downtown Toronto,1,Park,Toy / Game Store,Car Wash and Detail,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
98,York,1,Media Agency,Park,Toy / Game Store,Chinese Restaurant,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio


### Agrupación 3

In [379]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,2,General Contractor,Business and Professional Services,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
11,Scarborough,2,General Contractor,Toy / Game Store,Doors and Windows Contractor,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli
36,East York,2,General Contractor,Repair Service,Toy / Game Store,Dog Park,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store
64,Central Toronto,2,Property Management Office,General Contractor,Dog Park,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
93,Etobicoke,2,General Contractor,Toy / Game Store,Doors and Windows Contractor,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli
100,Etobicoke,2,General Contractor,Event Service,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio


### Agrupación 4

In [380]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,3,Machine Shop,Party Supply Store,Toy / Game Store,Chinese Restaurant,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
2,Scarborough,3,Plumber,Toy / Game Store,Car Wash and Detail,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
3,Scarborough,3,Korean BBQ Restaurant,Cosmetics Store,Toy / Game Store,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Dance Studio,Deli,Department Store
4,Scarborough,3,Hakka Restaurant,Loans Agency,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
5,Scarborough,3,Elementary School,Playground,Toy / Game Store,Diner,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store
6,Scarborough,3,Photographer,Department Store,Toy / Game Store,Car Wash and Detail,Church,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store
8,Scarborough,3,Church,Plumber,Toy / Game Store,Car Wash and Detail,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
9,Scarborough,3,Café,Electrician,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
10,Scarborough,3,Automotive Repair Shop,Toy / Game Store,Doors and Windows Contractor,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli
12,Scarborough,3,Restaurant,Landscaper and Gardener,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio


### Agrupación 5

In [381]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,North York,4,"Cafes, Coffee, and Tea Houses",Park,Business and Professional Services,Toy / Game Store,Diner,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store
24,North York,4,"Cafes, Coffee, and Tea Houses",Park,Business and Professional Services,Toy / Game Store,Diner,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store
28,North York,4,"Cafes, Coffee, and Tea Houses",Deli,Toy / Game Store,Chinese Restaurant,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
29,North York,4,Massage Clinic,"Cafes, Coffee, and Tea Houses",Toy / Game Store,Chinese Restaurant,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
39,East York,4,Deli,Diner,Toy / Game Store,Doors and Windows Contractor,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
57,Downtown Toronto,4,Café,Deli,Toy / Game Store,Chinese Restaurant,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
59,Downtown Toronto,4,"Cafes, Coffee, and Tea Houses",Fried Chicken Joint,Toy / Game Store,Dog Park,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio
86,MississaugaCanada Post Gateway Processing Centre,4,"Cafes, Coffee, and Tea Houses",Toy / Game Store,Chinese Restaurant,Clothing Store,Coffee Shop,Computer Repair Service,Construction Supplies Store,Cosmetics Store,Dance Studio,Deli
