## Applied Data Science - Capstone Project

### Opening a Sushi Restaurant in Buenos Aires 

#### Import required libraries

In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json 
from pandas.io.json import json_normalize
import requests
from bs4 import BeautifulSoup
!pip install geocoder
import geocoder
import os
!pip install folium
import folium # map rendering library
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
# Matplotlib and associated plotting modules
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
%matplotlib inline

from sklearn.cluster import KMeans

print('Libraries imported.')

Libraries imported.


#### Take the data from the Wikipedia page into a DataFrame

In [2]:
res = requests.get("https://en.wikipedia.org/wiki/Neighbourhoods_of_Buenos_Aires")
soup = BeautifulSoup(res.content, 'lxml')
table = soup.find('table', class_= 'wikitable sortable')

In [3]:
bsas_df = pd.read_html(str(table))[0]
bsas_df.head(10)

Unnamed: 0,Name,Area in km²,Population,Commune
0,Agronomía,2.1,13963,15
1,Almagro,4.1,128206,5
2,Balvanera,4.4,137521,3
3,Barracas,7.6,73377,4
4,Belgrano,6.8,126816,13
5,Boedo,2.6,45563,5
6,Caballito,6.8,170309,6
7,Chacarita,3.1,25778,15
8,Coghlan,1.3,18021,12
9,Colegiales,2.3,52391,13


Drop unnecessary columns

In [4]:
bsas_df.drop(['Area in km²', 'Commune'], axis=1, inplace = True)
bsas_df

Unnamed: 0,Name,Population
0,Agronomía,13963
1,Almagro,128206
2,Balvanera,137521
3,Barracas,73377
4,Belgrano,126816
5,Boedo,45563
6,Caballito,170309
7,Chacarita,25778
8,Coghlan,18021
9,Colegiales,52391


In [5]:
bsas_df.shape

(48, 2)

Rename columns: Name to Neighborhood

In [6]:
bsas_df.rename(columns = {'Name': 'Neighborhood'}, inplace=True)
bsas_df

Unnamed: 0,Neighborhood,Population
0,Agronomía,13963
1,Almagro,128206
2,Balvanera,137521
3,Barracas,73377
4,Belgrano,126816
5,Boedo,45563
6,Caballito,170309
7,Chacarita,25778
8,Coghlan,18021
9,Colegiales,52391


#### Get the geographical coordinates 

In [7]:
def get_latilong(neighborhood):
    lati_long_coords = None
    while(lati_long_coords is None):
        g = geocoder.arcgis('{}, Buenos Aires, Buenos Aires'.format(neighborhood))
        lati_long_coords = g.latlng
    return lati_long_coords

In [8]:
neighborhoods = bsas_df['Neighborhood']
coords = [get_latilong(neighborhood) for neighborhood in bsas_df["Neighborhood"].tolist()]

In [9]:
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
bsas_df['Latitude'] = df_coords['Latitude']
bsas_df['Longitude'] = df_coords['Longitude']

In [10]:
bsas_df

Unnamed: 0,Neighborhood,Population,Latitude,Longitude
0,Agronomía,13963,-34.59243,-58.49659
1,Almagro,128206,-34.61108,-58.43028
2,Balvanera,137521,-34.61011,-58.40602
3,Barracas,73377,-34.6499,-58.3891
4,Belgrano,126816,-34.56153,-58.45702
5,Boedo,45563,-34.63228,-58.41779
6,Caballito,170309,-34.62218,-58.42858
7,Chacarita,25778,-34.58351,-58.45287
8,Coghlan,18021,-34.56158,-58.47428
9,Colegiales,52391,-34.57502,-58.44777


Save to csv file

In [11]:
bsas_df.to_csv('bsas_df.csv', index=False)

#### Create a map of Buenos AIres

In [12]:
address = 'Buenos Aires, Argentina'

geolocator = Nominatim(user_agent="BsAs")
location = geolocator.geocode(address)
latitude = location.latitude
longitude= location.longitude
print('The geograpical coordinate of Buenos Aires are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Buenos Aires are -34.6075682, -58.4370894.


In [13]:
map_bsas = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, Neighborhood in zip(bsas_df['Latitude'], bsas_df['Longitude'], bsas_df['Neighborhood']):
    label = '{}'.format(Neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bsas)  
    
map_bsas

#### Use Foursquare to explore the neighborhoods

In [14]:
CLIENT_ID = 'MLUPTXHJ1Q5H5F5S2ND4LIWBFD2S2AD3EP2HGSTNECXSG1CX' # my Foursquare ID
CLIENT_SECRET = '0ZAWTZ4V0YIYSBQMWB503XT4DUBLOSUMMZ3SUBO53QWKESD5' # my Foursquare Secret
VERSION = '20180604'

print('Your credentails:')
print('CLIENT_ID: '+CLIENT_ID)
print('CLIENT_SECRET: '+CLIENT_SECRET)

Your credentails:
CLIENT_ID: MLUPTXHJ1Q5H5F5S2ND4LIWBFD2S2AD3EP2HGSTNECXSG1CX
CLIENT_SECRET: 0ZAWTZ4V0YIYSBQMWB503XT4DUBLOSUMMZ3SUBO53QWKESD5


In [15]:
radius = 500
limit = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list =[]
    for name, lat, lng, in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
   
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venues_list in venues_list for item in venues_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [16]:
bsas_venues = getNearbyVenues(names= bsas_df['Neighborhood'],
                                 latitudes=bsas_df['Latitude'],
                                 longitudes=bsas_df['Longitude']
                                )

In [17]:
print(bsas_venues.shape)
bsas_venues.head(n=10)

(1114, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Agronomía,-34.59243,-58.49659,Bonafide,-34.590722,-58.498184,Coffee Shop
1,Agronomía,-34.59243,-58.49659,Baraba,-34.590674,-58.500094,Restaurant
2,Agronomía,-34.59243,-58.49659,Bien de Bien,-34.592322,-58.500959,Café
3,Agronomía,-34.59243,-58.49659,Renatto Cucina Italiana,-34.591342,-58.500781,Italian Restaurant
4,Agronomía,-34.59243,-58.49659,Plaza Martín Rodríguez,-34.590837,-58.501098,Plaza
5,Agronomía,-34.59243,-58.49659,Ladobueno Patisserie & Café,-34.596536,-58.498617,Coffee Shop
6,Agronomía,-34.59243,-58.49659,Repostería Papá Eduvilio,-34.595906,-58.499338,Bakery
7,Agronomía,-34.59243,-58.49659,Club El Talar,-34.59182,-58.49833,Sports Club
8,Agronomía,-34.59243,-58.49659,Rancho IN,-34.59599,-58.495916,Steakhouse
9,Agronomía,-34.59243,-58.49659,Al Piatto,-34.590089,-58.497244,Pizza Place


Let's check how many venues are on each neighborhood

In [18]:
bsas_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agronomía,18,18,18,18,18,18
Almagro,29,29,29,29,29,29
Balvanera,22,22,22,22,22,22
Barracas,4,4,4,4,4,4
Belgrano,42,42,42,42,42,42
Boedo,9,9,9,9,9,9
Caballito,41,41,41,41,41,41
Chacarita,45,45,45,45,45,45
Coghlan,23,23,23,23,23,23
Colegiales,27,27,27,27,27,27


Let's find out the unique categories

In [19]:
print('There are {} unique categories.'.format(len(bsas_venues['Venue Category'].unique())))

There are 176 unique categories.


In [20]:
bsas_venues['Venue Category'].unique()[:50]

array(['Coffee Shop', 'Restaurant', 'Café', 'Italian Restaurant', 'Plaza',
       'Bakery', 'Sports Club', 'Steakhouse', 'Pizza Place', 'Bar',
       'Soccer Field', 'Train Station', 'Ice Cream Shop',
       'Argentinian Restaurant', 'Bus Station', 'Hotel',
       'Fast Food Restaurant', 'Empanada Restaurant',
       'Bike Rental / Bike Share', 'Concert Hall', 'Shop & Service',
       'Pharmacy', 'Dessert Shop', 'Gym', 'Pub', 'Grocery Store',
       'Cultural Center', 'Chinese Restaurant', 'Peruvian Restaurant',
       'BBQ Joint', 'Gas Station', 'Electronics Store', 'Bus Stop',
       'Platform', 'Convenience Store', 'Park', 'Farmers Market',
       'Athletics & Sports', 'Tea Room', 'Market', 'Salad Place',
       'Art Museum', 'Bookstore', 'French Restaurant', 'Smoke Shop',
       'Vegetarian / Vegan Restaurant', 'Comic Shop', 'German Restaurant',
       'Bike Shop', 'History Museum'], dtype=object)

#### Let's analyze each neighborhood

In [21]:
# one hot encoding
bsas_onehot = pd.get_dummies(bsas_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bsas_onehot['Neighborhood'] = bsas_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [bsas_onehot.columns[-1]] + list(bsas_onehot.columns[:-1])
bsas_onehot = bsas_onehot[fixed_columns]

bsas_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bar,Basketball Court,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bistro,Bookstore,Breakfast Spot,Brewery,Buffet,Burger Joint,Bus Station,Bus Stop,Café,Camera Store,Candy Store,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Cultural Center,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Doner Restaurant,Electronics Store,Empanada Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fire Station,Flea Market,Food & Drink Shop,Food Service,Food Truck,French Restaurant,Furniture / Home Store,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Grocery Store,Gym,Gym / Fitness Center,Historic Site,History Museum,Hobby Shop,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Intersection,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewish Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Lounge,Market,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Moving Target,Multiplex,Museum,Music Venue,Nightclub,Office,Outdoor Sculpture,Outlet Mall,Outlet Store,Paella Restaurant,Park,Parking,Pastry Shop,Performing Arts Venue,Peruvian Restaurant,Pet Store,Pharmacy,Photography Lab,Pizza Place,Platform,Plaza,Pool,Pool Hall,Pub,Radio Station,Recording Studio,Restaurant,Rock Club,Roof Deck,Salad Place,Sandwich Place,Sausage Shop,Science Museum,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Social Club,South American Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Club,Steakhouse,Street Fair,Supermarket,Sushi Restaurant,Swiss Restaurant,Taco Place,Tapas Restaurant,Tea Room,Tennis Court,Theater,Theme Restaurant,Toy / Game Store,Train Station,Tunnel,Used Bookstore,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Agronomía,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Agronomía,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Agronomía,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Agronomía,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Agronomía,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [22]:
bsas_onehot.shape

(1114, 177)

Let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [23]:
bsas_grouped = bsas_onehot.groupby('Neighborhood').mean().reset_index()
bsas_grouped

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bar,Basketball Court,Bed & Breakfast,Beer Bar,Beer Garden,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bistro,Bookstore,Breakfast Spot,Brewery,Buffet,Burger Joint,Bus Station,Bus Stop,Café,Camera Store,Candy Store,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Cultural Center,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Doner Restaurant,Electronics Store,Empanada Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fire Station,Flea Market,Food & Drink Shop,Food Service,Food Truck,French Restaurant,Furniture / Home Store,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Grocery Store,Gym,Gym / Fitness Center,Historic Site,History Museum,Hobby Shop,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Intersection,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewish Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Lounge,Market,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Moving Target,Multiplex,Museum,Music Venue,Nightclub,Office,Outdoor Sculpture,Outlet Mall,Outlet Store,Paella Restaurant,Park,Parking,Pastry Shop,Performing Arts Venue,Peruvian Restaurant,Pet Store,Pharmacy,Photography Lab,Pizza Place,Platform,Plaza,Pool,Pool Hall,Pub,Radio Station,Recording Studio,Restaurant,Rock Club,Roof Deck,Salad Place,Sandwich Place,Sausage Shop,Science Museum,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Social Club,South American Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Club,Steakhouse,Street Fair,Supermarket,Sushi Restaurant,Swiss Restaurant,Taco Place,Tapas Restaurant,Tea Room,Tennis Court,Theater,Theme Restaurant,Toy / Game Store,Train Station,Tunnel,Used Bookstore,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Agronomía,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Almagro,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.137931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.137931,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Balvanera,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.136364,0.136364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Barracas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Belgrano,0.0,0.0,0.071429,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.02381,0.0,0.047619,0.0,0.02381,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.02381,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.02381,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.02381,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.02381
5,Boedo,0.0,0.0,0.555556,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Caballito,0.0,0.0,0.04878,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439,0.0,0.04878,0.02439,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.04878,0.0,0.02439,0.073171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04878,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04878,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.097561,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.02439,0.0,0.0,0.0,0.02439,0.0,0.073171,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.02439,0.0,0.0,0.0,0.0,0.0,0.02439,0.04878,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Chacarita,0.0,0.0,0.044444,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.044444,0.066667,0.0,0.0,0.044444,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.044444,0.0,0.0,0.0,0.0,0.044444,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044444,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.022222,0.022222,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044444,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044444,0.022222,0.088889,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.044444,0.022222,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.022222,0.0,0.0,0.0,0.0
8,Coghlan,0.0,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.043478,0.043478,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.086957,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Colegiales,0.0,0.0,0.074074,0.0,0.0,0.0,0.0,0.037037,0.0,0.037037,0.0,0.185185,0.0,0.037037,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.037037,0.0,0.037037,0.0,0.0,0.0,0.037037,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
bsas_grouped.shape

(46, 177)

Let's print each neighborhood that has Sushi Restaurants

In [25]:
len(bsas_grouped[bsas_grouped['Sushi Restaurant'] >0])
bsas_sushi = bsas_grouped[['Neighborhood', 'Sushi Restaurant']]
bsas_sushi.head()

Unnamed: 0,Neighborhood,Sushi Restaurant
0,Agronomía,0.0
1,Almagro,0.0
2,Balvanera,0.0
3,Barracas,0.0
4,Belgrano,0.02381


#### Cluster Neighborhoods

In [26]:
# Using K-Means to cluster neighborhood into  clusters
bsas_clustering = bsas_sushi.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=4, random_state=0).fit(bsas_clustering)
kmeans.labels_

array([0, 0, 0, 0, 3, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0], dtype=int32)

Let's create a new dataframe the includes the cluster as well as the top 10 venues for each neighborhood

In [27]:
bsas_merged = bsas_sushi.copy()
bsas_merged["Cluster Labels"] = kmeans.labels_
bsas_merged.head()

Unnamed: 0,Neighborhood,Sushi Restaurant,Cluster Labels
0,Agronomía,0.0,0
1,Almagro,0.0,0
2,Balvanera,0.0,0
3,Barracas,0.0,0
4,Belgrano,0.02381,3


In [28]:
# merge bsas_grouped with bsas_df to add latitude/longitude for each neighborhood
bsas_merged = bsas_merged.join(bsas_df.set_index('Neighborhood'), on='Neighborhood')
bsas_merged.head()

Unnamed: 0,Neighborhood,Sushi Restaurant,Cluster Labels,Population,Latitude,Longitude
0,Agronomía,0.0,0,13963,-34.59243,-58.49659
1,Almagro,0.0,0,128206,-34.61108,-58.43028
2,Balvanera,0.0,0,137521,-34.61011,-58.40602
3,Barracas,0.0,0,73377,-34.6499,-58.3891
4,Belgrano,0.02381,3,126816,-34.56153,-58.45702


Let's visualize the clusters

In [29]:
kclusters = 4

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, kclusters))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi , cluster in zip(bsas_merged['Latitude'], 
                                   bsas_merged['Longitude'], 
                                   bsas_merged['Neighborhood'], 
                                   bsas_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' -Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [30]:
map_clusters.save('map_clusters.html')

#### Examine the clusters

In [31]:
bsas_merged.loc[bsas_merged['Cluster Labels']==0]

Unnamed: 0,Neighborhood,Sushi Restaurant,Cluster Labels,Population,Latitude,Longitude
0,Agronomía,0.0,0,13963,-34.59243,-58.49659
1,Almagro,0.0,0,128206,-34.61108,-58.43028
2,Balvanera,0.0,0,137521,-34.61011,-58.40602
3,Barracas,0.0,0,73377,-34.6499,-58.3891
5,Boedo,0.0,0,45563,-34.63228,-58.41779
7,Chacarita,0.0,0,25778,-34.58351,-58.45287
8,Coghlan,0.0,0,18021,-34.56158,-58.47428
10,Constitución,0.0,0,41894,-34.62695,-58.38295
11,Flores,0.0,0,142695,-36.017465,-59.079428
12,Floresta,0.0,0,37247,-34.62898,-58.48159


In [32]:
bsas_merged.loc[bsas_merged['Cluster Labels']==1]

Unnamed: 0,Neighborhood,Sushi Restaurant,Cluster Labels,Population,Latitude,Longitude
6,Caballito,0.04878,1,170309,-34.62218,-58.42858


In [33]:
bsas_merged.loc[bsas_merged['Cluster Labels']==2]

Unnamed: 0,Neighborhood,Sushi Restaurant,Cluster Labels,Population,Latitude,Longitude
9,Colegiales,0.037037,2,52391,-34.57502,-58.44777
28,San Cristóbal,0.035714,2,46494,-34.62288,-58.40532


In [34]:
bsas_merged.loc[bsas_merged['Cluster Labels']==3]

Unnamed: 0,Neighborhood,Sushi Restaurant,Cluster Labels,Population,Latitude,Longitude
4,Belgrano,0.02381,3,126816,-34.56153,-58.45702
20,Palermo,0.014286,3,225245,-34.58845,-58.42343
27,Retiro,0.01,3,38635,-34.59475,-58.38273


#### Observations:

As you can see from analyzing the clusters, the sushi restaurants are located in the clusters 1, 2 and 3, leaving cluster 0 with no sushi restaurants at all. This gives us a great oportunity for these neighborhoods. We can then analyze by looking at the population of each neighborhood in cluster 0, and choose the one where the possibility of opening a sushi restaurant would be excellent. 

### Conclusion:
The neighborhood of Recoleta, with a population of 165494 inhabitants, would be a very good option for opening a sushi restaurant.
It is one of the most populated neighborhoods in cluster 0, and, as you can see from the map, it is located near the port, which means the number of passing by people is very high. I believe this is a good location for opening a sushi restaurant.