# Coursera Capstone
## Using machine learning to determine the best neighborhood to open a given restaurant style

In [10]:
import pandas as pd
import numpy as np
import bs4 as bs
import requests
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
#!conda install -c conda-forge folium=0.5.0 --yes
import folium
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

url = 'https://pt.wikipedia.org/wiki/Lista_de_bairros_de_Belo_Horizonte'
WikiBH = requests.get(url).content
soup = bs.BeautifulSoup(WikiBH,'html.parser')     

data = [];
for item in range(len(soup.find_all('tbody')[3].find_all('tr'))):
    row=[]
    if item >0:
        for row_item in range(len(soup.find_all('tbody')[3].find_all('tr')[1].find_all('td'))):
            row.append(soup.find_all('tbody')[3].find_all('tr')[item].find_all('td')[row_item].get_text())
    data.append(row)

df = pd.DataFrame(data,columns=['#','Zone','Regional','Neighborhood','Population','Size(km²)'])
df.dropna(inplace=True)
df.reset_index(drop=True,inplace=True)
df

df['latitude']=0.0
df['longitude']=0.0
for i in range(len(df['Neighborhood'])):
    address = df['Neighborhood'][i] +', Belo Horizonte'                                
    geolocator = Nominatim(user_agent="ny_explorer")
    location = geolocator.geocode(address)
    if location != None:
        latitude = location.latitude
        longitude = location.longitude
        print('The geograpical coordinate of {} are {}, {}.  {}'.format(address,latitude, longitude,i))
        df['latitude'][i]=latitude
        df['longitude'][i]=longitude
    else:
        print(df['Neighborhood'][i] + ' wasn\'t found on Nominatim')
        df['latitude'][i]=''
        df['longitude'][i]=''

df.drop(df[df['latitude']==''].index,inplace=True)
df.reset_index(drop=True,inplace=True)
df

In [11]:
# create map of New York using latitude and longitude values
address = 'Belo Horizonte, MG'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_BH = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['latitude'], df['longitude'], df['Zone'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_BH)  
    
map_BH

# Cleaning the data to remove points outside of the city
df = df[(df['longitude'].astype(float)<-43.85) & (df['longitude'].astype(float)>-44.07)
 & (df['latitude'].astype(float)<-19.82) & (df['latitude'].astype(float)>-20) ].reset_index(drop=True,inplace=True)
 #Saving to a csv file to make my life easier. If needed I can open the df from the file instead of srapping everything again
df.to_csv('BH_df.csv')

In [11]:
#Importing from file to make life easier.
df = pd.read_csv('BH_df.csv')

In [12]:
# create map of New York using latitude and longitude values
address = 'Belo Horizonte, MG'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_BH = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['latitude'], df['longitude'], df['Zone'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_BH)  
    
map_BH


In [13]:
CLIENT_ID = 'UOSOUPHZSZ31RZYIACFNWILJ4U1E4HJPT51BGJEXQUKCUJSW' # your Foursquare ID
CLIENT_SECRET = 'ZRXAYW4AVLVJAOAOQP3QS2M1CLFUF5SGCLB5GUQYAM10R2MG' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 200 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: UOSOUPHZSZ31RZYIACFNWILJ4U1E4HJPT51BGJEXQUKCUJSW
CLIENT_SECRET:ZRXAYW4AVLVJAOAOQP3QS2M1CLFUF5SGCLB5GUQYAM10R2MG


In [14]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [15]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        try:
            results = requests.get(url).json()["response"]['groups'][0]['items']
            #print(results)
            # return only relevant information for each nearby venue
            venues_list.append([(
                name, 
                lat, 
                lng,
                v['venue']['name'],
                v['venue']['id'],
                #requests.get('https://api.foursquare.com/v2/venues/{}/?client_id={}&client_secret={}&v={}'.format(v['venue']['id'], CLIENT_ID, CLIENT_SECRET, VERSION)).json()['response']['venue']['likes']['count'],
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])
            print(venues_list)
            nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
            nearby_venues.columns = ['Neighborhood', 
                                     'Neighborhood Latitude', 
                                     'Neighborhood Longitude', 
                                     'Venue',
                                     'Venue id',
                                     #'Venue likes',
                                     'Venue Latitude', 
                                     'Venue Longitude', 
                                     'Venue Category']
        except:
            print(requests.get(url).json()['meta']['errorType'])
            return(nearby_venues) 
   
    return(nearby_venues)

In [16]:
BH_venues = getNearbyVenues(names=df['Neighborhood'],latitudes=df['latitude'],longitudes=['longitude'])
BH_venues

Buritis
param_error


UnboundLocalError: local variable 'nearby_venues' referenced before assignment

In [None]:
# one hot encoding
BH_onehot = pd.get_dummies(BH_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
BH_onehot['Neighborhood'] = BH_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [BH_onehot.columns[-1]] + list(BH_onehot.columns[:-1])
BH_onehot = BH_onehot[fixed_columns]

BH_onehot.head()

In [121]:
BH_venues.to_csv('BH_venues.csv')

In [122]:
BH_grouped = BH_onehot.groupby('Neighborhood').mean().reset_index()
BH_grouped

Unnamed: 0,Neighborhood,ATM,Acai House,Accessories Store,Amphitheater,Antique Shop,Arcade,Arepa Restaurant,Art Gallery,Art Museum,...,Trail,Tree,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Warehouse Store,Water Park,Wine Bar,Women's Store
0,Alto Caiçaras,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0000,0.0,0.000000
1,Alto Vera Cruz,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0625,0.0,0.000000
2,Alípio de Melo,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.041667,0.0000,0.0,0.000000
3,Apolônia,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0000,0.0,0.058824
4,Bairro das Indústrias I,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0000,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142,Ventosa,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0000,0.0,0.000000
143,Vila Barragem Santa Lúcia,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0000,0.0,0.000000
144,Vila Clóris,0.0,0.018182,0.018182,0.0,0.0,0.018182,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0000,0.0,0.018182
145,Vila Pinho,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0000,0.0,0.000000


In [123]:
fixed_columns = [BH_onehot.columns[-1]] + list(BH_onehot.columns[:-1])
BH_onehot = BH_onehot[fixed_columns]

In [124]:
num_top_venues = 5

for hood in BH_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = BH_grouped[BH_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alto Caiçaras----
                 venue  freq
0             Bus Stop  0.12
1               Bakery  0.12
2       Ice Cream Shop  0.06
3  Martial Arts School  0.06
4           Restaurant  0.06


----Alto Vera Cruz----
                  venue  freq
0                Bakery  0.12
1                   Bar  0.12
2              Pharmacy  0.06
3                   Spa  0.06
4  Brazilian Restaurant  0.06


----Alípio de Melo----
          venue  freq
0           Bar  0.17
1     BBQ Joint  0.08
2  Burger Joint  0.08
3   Beer Garden  0.08
4        Bakery  0.08


----Apolônia----
            venue  freq
0  Ice Cream Shop  0.06
1          Bakery  0.06
2     Supermarket  0.06
3    Soccer Field  0.06
4     Pizza Place  0.06


----Bairro das Indústrias I----
                     venue  freq
0                      Bar  0.33
1                      Spa  0.17
2  Health & Beauty Service  0.17
3             Soccer Field  0.17
4              Snack Place  0.17


----Barreiro----
                  venue  fre

          venue  freq
0   Pizza Place  0.25
1           Bar  0.25
2  Burger Joint  0.25
3   Art Gallery  0.25
4           ATM  0.00


----Guarani----
         venue  freq
0       Bakery  0.19
1          Bar  0.12
2  Supermarket  0.08
3          ATM  0.04
4        Track  0.04


----Gutierrez----
                  venue  freq
0  Gym / Fitness Center  0.10
1           Snack Place  0.07
2                Bakery  0.07
3              Pharmacy  0.05
4  Brazilian Restaurant  0.05


----Havaí----
                        venue  freq
0                      Bakery  0.20
1        Brazilian Restaurant  0.13
2              Ice Cream Shop  0.07
3  Tourist Information Center  0.07
4                       Plaza  0.07


----Heliópolis----
                  venue  freq
0                 Plaza   0.3
1  Gym / Fitness Center   0.2
2                Bakery   0.2
3               Dog Run   0.1
4        Ice Cream Shop   0.1


----Independência----
         venue  freq
0  Frame Store   0.1
1        Plaza   0.1
2   

                  venue  freq
0  Brazilian Restaurant  0.09
1           Pizza Place  0.07
2        Ice Cream Shop  0.05
3                   Bar  0.05
4  Gym / Fitness Center  0.05


----Palmeiras (ZO)----
                venue  freq
0               Plaza  0.15
1                 Bar  0.10
2         Supermarket  0.10
3                 Gym  0.10
4  Athletics & Sports  0.05


----Palmeiras (ZS)----
                venue  freq
0               Plaza  0.15
1                 Bar  0.10
2         Supermarket  0.10
3                 Gym  0.10
4  Athletics & Sports  0.05


----Paquetá----
                  venue  freq
0                Bakery  0.14
1  Gym / Fitness Center  0.07
2                   Gym  0.07
3        Gymnastics Gym  0.07
4           Beer Garden  0.07


----Paraíso----
                    venue  freq
0                  Bakery  0.19
1    Brazilian Restaurant  0.07
2  Furniture / Home Store  0.07
3                     Bar  0.07
4           Grocery Store  0.07


----Paulo VI----
       

In [125]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [148]:
num_top_venues = 20

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = BH_grouped['Neighborhood']

for ind in np.arange(BH_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(BH_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Alto Caiçaras,Bus Stop,Bakery,Ice Cream Shop,Auto Garage,Pizza Place,Restaurant,Park,Sandwich Place,Motel,...,Martial Arts School,Supermarket,Plaza,Gym,Gym / Fitness Center,Fast Food Restaurant,Farmers Market,Farm,Event Space,Event Service
1,Alto Vera Cruz,Bar,Bakery,Brazilian Restaurant,Gym,Burger Joint,Soccer Field,Pool,Market,Furniture / Home Store,...,Pharmacy,Gymnastics Gym,Water Park,Pizza Place,Event Space,Flower Shop,Cultural Center,Flea Market,Fish Market,Fish & Chips Shop
2,Alípio de Melo,Bar,Ice Cream Shop,Beer Garden,Bakery,Burger Joint,BBQ Joint,Diner,Fast Food Restaurant,Taco Place,...,Gym Pool,Park,Italian Restaurant,Warehouse Store,Health & Beauty Service,Pizza Place,Food,Flower Shop,Flea Market,Fish Market
3,Apolônia,Women's Store,Supermarket,Auto Garage,Bakery,Bistro,Brazilian Restaurant,Cosmetics Shop,Diner,Fruit & Vegetable Store,...,Gym / Fitness Center,Hot Dog Joint,Juice Bar,Pet Store,Pizza Place,Soccer Field,Ice Cream Shop,Tennis Stadium,Tennis Court,Fish & Chips Shop
4,Bairro das Indústrias I,Bar,Spa,Health & Beauty Service,Soccer Field,Snack Place,Women's Store,Empada House,Flower Shop,Flea Market,...,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Event Space,Event Service,Electronics Store,Food & Drink Shop,Drugstore,Dog Run


In [149]:
# set number of clusters
kclusters = 5

BH_grouped_clustering = BH_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(BH_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 1, 1, 1, 1, 1, 1, 3, 3, 3])

In [150]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_.astype(int))

BH_merged = df

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
BH_merged = BH_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

BH_merged.head() # check the last columns!

Unnamed: 0,#,Zone,Regional,Neighborhood,Population,Size(km²),latitude,longitude,Cluster Labels,1st Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,1,ZONA LESTE,Leste,Sagrada Família,34 395,"2,291\n",-20.1964,-44.1078,3.0,Plaza,...,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Event Space,Event Service,Drugstore,Electronics Store,Food Stand,Dog Run
1,2,ZONA SUL,Oeste,Buritis,29 374,"3,823\n",-19.9766,-43.9674,1.0,Gym / Fitness Center,...,Bakery,Burger Joint,BBQ Joint,Sandwich Place,Convenience Store,Pet Store,Coffee Shop,Dive Bar,Cupcake Shop,Fish Market
2,3,ZONA OESTE,Noroeste,Padre Eustáquio,28 773,"2,969\n",-19.9143,-43.9794,1.0,Brazilian Restaurant,...,Comfort Food Restaurant,Fast Food Restaurant,Snack Place,Grocery Store,Mineiro Restaurant,Burger Joint,Pet Store,Electronics Store,Men's Store,Chocolate Shop
3,4,ZONA OESTE,Barreiro,Lindéia,25 231,"2,012\n",-19.9774,-44.0506,3.0,Bakery,...,Department Store,Pizza Place,Sandwich Place,Soccer Field,Brazilian Restaurant,Boutique,Hot Dog Joint,Arts & Crafts Store,BBQ Joint,Bagel Shop
4,5,ZONA NORTE,Pamp./ Venda Nova,Santa Mônica,23 883,"2,178\n",-19.8246,-43.9775,1.0,Plaza,...,Boutique,Food Truck,Gym / Fitness Center,Bus Stop,Pizza Place,Event Service,Flea Market,Hostel,Fish & Chips Shop,Fast Food Restaurant


In [151]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(BH_merged['latitude'], BH_merged['longitude'], BH_merged['Neighborhood'], BH_merged[BH_merged['Cluster Labels']<=4]['Cluster Labels'].astype(int)):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [136]:
BH_merged.groupby('Cluster Labels').count()

Unnamed: 0_level_0,#,Zone,Regional,Neighborhood,Population,Size(km²),latitude,longitude,1st Most Common Venue,2nd Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,32,32,32,32,32,32,32,32,32,32,...,32,32,32,32,32,32,32,32,32,32
1.0,19,19,19,19,19,19,19,19,19,19,...,19,19,19,19,19,19,19,19,19,19
2.0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
3.0,33,33,33,33,33,33,33,33,33,33,...,33,33,33,33,33,33,33,33,33,33
4.0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
5.0,19,19,19,19,19,19,19,19,19,19,...,19,19,19,19,19,19,19,19,19,19
6.0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
7.0,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
8.0,38,38,38,38,38,38,38,38,38,38,...,38,38,38,38,38,38,38,38,38,38
9.0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [137]:
BH_merged.loc[BH_merged['Cluster Labels'] == 0, BH_merged.columns[[3] + list(range(6, BH_merged.shape[1]))]]

Unnamed: 0,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
4,Santa Mônica,-19.8246,-43.9775,0.0,Plaza,Bar,Soccer Field,Burger Joint,Brazilian Restaurant,Hot Dog Joint,...,Boutique,Food Truck,Gym / Fitness Center,Bus Stop,Pizza Place,Event Service,Flea Market,Hostel,Fish & Chips Shop,Fast Food Restaurant
9,Alto Vera Cruz,-19.911,-43.8916,0.0,Bar,Bakery,Brazilian Restaurant,Gym,Burger Joint,Soccer Field,...,Pharmacy,Gymnastics Gym,Water Park,Pizza Place,Event Space,Flower Shop,Cultural Center,Flea Market,Fish Market,Fish & Chips Shop
11,Piratininga,-19.8156,-43.9902,0.0,Burger Joint,Gym,Bar,Grocery Store,Market,Fruit & Vegetable Store,...,Fish & Chips Shop,Fast Food Restaurant,Event Service,Fish Market,Flea Market,Flower Shop,Food,Event Space,Electronics Store,Empada House
16,Santa Amélia,-19.8377,-43.9742,0.0,Bar,Pizza Place,Acai House,Burger Joint,Gym / Fitness Center,Bakery,...,Sandwich Place,Mineiro Restaurant,Brazilian Restaurant,Frozen Yogurt Shop,Gastropub,Baiano Restaurant,Brewery,Plaza,Auto Garage,Water Park
27,Centro,-6.8825,-38.5575,0.0,Print Shop,Bar,Plaza,Pizza Place,Electronics Store,Soccer Stadium,...,Farm,Fish & Chips Shop,Fast Food Restaurant,Fish Market,Flea Market,Flower Shop,Empada House,Dog Run,Drugstore,Food & Drink Shop
30,Ribeiro de Abreu,-19.8314,-43.8998,0.0,Burger Joint,Bakery,Ice Cream Shop,Arts & Crafts Store,Food Stand,Gymnastics Gym,...,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Event Space,Electronics Store,Event Service
32,Santa Tereza,-19.915,-43.9146,0.0,Bar,Plaza,Ice Cream Shop,Farmers Market,BBQ Joint,Supermarket,...,Brazilian Restaurant,Brewery,Hostel,Soup Place,Burger Joint,Snack Place,Cocktail Bar,Music Venue,Pool Hall,Concert Hall
39,Nova Suissa,-19.9327,-43.98,0.0,Bar,BBQ Joint,Burger Joint,Hotel,Print Shop,Hot Dog Joint,...,Supermarket,Sushi Restaurant,Brazilian Restaurant,Historic Site,Grocery Store,Drugstore,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant
46,São Geraldo,-19.9079,-43.6705,0.0,Convenience Store,Burger Joint,Market,Farm,Bakery,Liquor Store,...,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Event Space,Event Service,Empada House,Electronics Store,Drugstore
52,Caiçaras,-19.9032,-43.9682,0.0,Burger Joint,Bar,Brazilian Restaurant,Gym / Fitness Center,Restaurant,Middle Eastern Restaurant,...,Fried Chicken Joint,Supermarket,Furniture / Home Store,Market,Spa,Sandwich Place,Bookstore,Snack Place,Arts & Entertainment,Gym


In [94]:
BH_merged.loc[BH_merged['Cluster Labels'] == 1, BH_merged.columns[[3] + list(range(6, BH_merged.shape[1]))]]

Unnamed: 0,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Lindéia,-19.9774,-44.0506,1.0,Bakery,Pharmacy,Clothing Store,Burger Joint,Supermarket,Women's Store,Furniture / Home Store,Frozen Yogurt Shop,Flower Shop,Electronics Store
5,Céu Azul,-19.8227,-44.004,1.0,Bakery,Bookstore,Shopping Plaza,Grocery Store,Food Truck,Pet Store,Pharmacy,Spa,Health & Beauty Service,Pizza Place
17,Cabana do Pai Tomás,-19.946,-44.0004,1.0,Bakery,Hot Dog Joint,Burger Joint,Department Store,Gymnastics Gym,Market,Snack Place,Soccer Field,Pet Store,Fruit & Vegetable Store
37,Estrela Dalva,-19.864,-44.0246,1.0,Bakery,Supermarket,Fruit & Vegetable Store,Ice Cream Shop,Pizza Place,Hot Dog Joint,Pharmacy,Diner,Snack Place,Soccer Field
41,União,41.1538,-8.51368,1.0,Convenience Store,Supermarket,Restaurant,Bakery,Café,Event Service,Food & Drink Shop,Food,Flower Shop,Flea Market
49,Tupi B,-19.8338,-43.9181,1.0,Bakery,Ice Cream Shop,Pharmacy,Scenic Lookout,Candy Store,Supermarket,Fruit & Vegetable Store,Bar,Food Truck,Hardware Store
50,Paraíso,-19.92,-43.9066,1.0,Bakery,Grocery Store,Brewery,Brazilian Restaurant,Furniture / Home Store,Bar,Snack Place,Health & Beauty Service,Gourmet Shop,Burger Joint
51,Copacabana,-19.8342,-43.9877,1.0,Bakery,Supermarket,Gym / Fitness Center,Dance Studio,Sandwich Place,Pharmacy,Churrascaria,Event Service,Event Space,Farm
57,Milionários,-19.9805,-44.0018,1.0,Bakery,Dessert Shop,Convenience Store,Department Store,BBQ Joint,Bus Stop,Lottery Retailer,Big Box Store,Supermarket,Gaming Cafe
63,Jardim Guanabara,-19.8289,-43.9349,1.0,Bakery,Furniture / Home Store,Shoe Store,Soccer Field,Supermarket,Ice Cream Shop,Pharmacy,Pet Store,Paper / Office Supplies Store,Department Store


In [95]:
BH_merged.loc[BH_merged['Cluster Labels'] == 2, BH_merged.columns[[3] + list(range(6, BH_merged.shape[1]))]]

Unnamed: 0,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
151,Santa Rita de Cássia,-20.3973,-43.4205,2.0,Bakery,Women's Store,Food Stand,Food Court,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop


In [16]:
BH_venues[['Neighborhood','Bar']].groupby('Neighborhood').sum()

KeyError: "['Bar'] not in index"

In [27]:
BH_venues[BH_venues['Venue Category']=='Bar'].groupby(BH_venues['Neighborhood']).count()

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Alto Vera Cruz,2,2,2,2,2,2,2
Alípio de Melo,4,4,4,4,4,4,4
Bairro das Indústrias I,2,2,2,2,2,2,2
Betânia,1,1,1,1,1,1,1
Brasil Industrial,2,2,2,2,2,2,2
...,...,...,...,...,...,...,...
São Pedro,2,2,2,2,2,2,2
Teixeira Dias,1,1,1,1,1,1,1
Tirol,1,1,1,1,1,1,1
Tupi B,1,1,1,1,1,1,1


In [56]:
bares = BH_venues[BH_venues['Venue Category']=='Bar'][['Neighborhood','Venue Category']].groupby('Neighborhood').count().sort_values('Venue Category')

In [52]:
bares['Pessoas/bar'] = df['Population']/bares['Venue Category']

In [53]:
bares

Unnamed: 0_level_0,Venue Category,Pessoas/bar
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
Vila Clóris,1,
Minaslândia,1,
Marçola,1,
Maria Helena,1,
Paquetá,1,
...,...,...
Dona Clara,6,
Prado,7,
Coração Eucarístico,9,
Pindorama,10,


In [60]:
df_bares = df.merge(bares,on='Neighborhood')

In [61]:
df_bares['Population'] = df_bares['Population'].str.replace('\xa0','')
df_bares

Unnamed: 0,#,Zone,Regional,Neighborhood,Population,Size(km²),latitude,longitude,Venue Category
0,2,ZONA SUL,Oeste,Buritis,29 374,"3,823\n",-19.9766,-43.9674,1
1,3,ZONA OESTE,Noroeste,Padre Eustáquio,28 773,"2,969\n",-19.9143,-43.9794,2
2,5,ZONA NORTE,Pamp./ Venda Nova,Santa Mônica,23 883,"2,178\n",-19.8246,-43.9775,3
3,9,ZONA LESTE,Leste,Alto Vera Cruz,21 459,"0,888\n",-19.911,-43.8916,2
4,11,ZONA NORTE,Venda Nova,Piratininga,21 149,"1,694\n",-19.8156,-43.9902,1
...,...,...,...,...,...,...,...,...,...
65,136,ZONA SUL,Oeste,Grajaú,6 279,"0,426\n",-20.1479,-44.2167,1
66,138,ZONA LESTE,Nordeste,Paulo VI,6 205,"1,283\n",-19.8389,-43.8909,1
67,140,ZONA OESTE,Barreiro,Teixeira Dias,6 179,"0,733\n",-19.9879,-44.0164,1
68,144,ZONA SUL,Oeste,Nova Granada (ZS),5 516,"0,578\n",-19.9413,-43.9691,2


In [103]:
df_bares['Pessoas/bar'] = df_bares['Population'].astype(int)/df_bares['Venue Category']

In [101]:
df_bares['Population']=df_bares['Population'].str.replace('\xa0','')

In [108]:
df_bares.sort_values('Pessoas/bar',ascending=False)

Unnamed: 0,#,Zone,Regional,Neighborhood,Population,Size(km²),latitude,longitude,Venue Category,Pessoas/bar
0,2,ZONA SUL,Oeste,Buritis,29374,"3,823\n",-19.9766,-43.9674,1,29374.000000
4,11,ZONA NORTE,Venda Nova,Piratininga,21149,"1,694\n",-19.8156,-43.9902,1,21149.000000
14,26,HIPERCENTRO,Centro-Sul,Centro,16245,"1,952\n",-6.8825,-38.5575,1,16245.000000
16,33,ZONA LESTE,Nordeste,Cidade Nova,15378,"0,913\n",-19.8892,-43.9249,1,15378.000000
1,3,ZONA OESTE,Noroeste,Padre Eustáquio,28773,"2,969\n",-19.9143,-43.9794,2,14386.500000
...,...,...,...,...,...,...,...,...,...,...
27,57,ZONA LESTE,Centro-Sul/ Leste,Floresta (ZL),5262,"0,487\n",-19.917,-43.9332,5,1052.400000
51,111,ZONA OESTE,Noroeste,Coração Eucarístico,7654,"1,027\n",-19.9236,-43.9884,9,850.444444
15,31,ZONA LESTE,Leste,Santa Tereza,15607,"1,551\n",-19.915,-43.9146,22,709.409091
53,112,ZONA OESTE,Oeste,Palmeiras (ZO),1228,"0,232\n",-19.9678,-43.9808,2,614.000000


In [113]:
df_bares['Venue Category'].sum()

188

In [8]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            df['latitude'][0], 
            df['longitude'][0], 
            500, 
            LIMIT)
requests.get(url).json()

{'meta': {'code': 429,
  'errorType': 'quota_exceeded',
  'errorDetail': 'Quota exceeded',
  'requestId': '6049431e7c676a3ae10314af'},
 'response': {}}

In [114]:
requests.get('https://api.foursquare.com/v2/venues/{}/?client_id={}&client_secret={}&v={}'.format('54120e6a498e1cf9afe2a7fb', CLIENT_ID, CLIENT_SECRET, VERSION)).json()['response']['venue']['likes']['count']

{'meta': {'code': 429,
  'errorType': 'quota_exceeded',
  'errorDetail': 'Quota exceeded',
  'requestId': '604814767c789a5457550106'},
 'response': {}}

In [167]:
BH_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue id,Venue Latitude,Venue Longitude,Venue Category
0,Sagrada Família,-20.196404,-44.107806,lugar3,54120e6a498e1cf9afe2a7fb,-20.195593,-44.104614,Asian Restaurant
1,Sagrada Família,-20.196404,-44.107806,Aranha - Praça Padre Agostinho,4f22a426e4b006e5c03b8909,-20.196620,-44.104304,Plaza
2,Buritis,-19.976579,-43.967416,Dog's Shop,4dcc3ac452b19dd12dcd0670,-19.974309,-43.967688,Pet Store
3,Buritis,-19.976579,-43.967416,Beb’s Bar Buritis,5b5295ab0802d400399a170b,-19.972796,-43.967244,Bar
4,Buritis,-19.976579,-43.967416,Parque Aggeo Pio Sobrinho,4c8b19601556bfb7beccff92,-19.976082,-43.970962,Park
...,...,...,...,...,...,...,...,...
269,Sion,-19.949362,-43.932587,Salão Frizee Hair,4c0a4fad3c70b7130cf7275b,-19.951148,-43.934506,Salon / Barbershop
270,Sion,-19.949362,-43.932587,Casa Lotérica Nossa Senhora de Fátima,513f31eae4b06157b24825c2,-19.952363,-43.931502,Lottery Retailer
271,Sion,-19.949362,-43.932587,Cantina Amici,53723baf498e3acfb05c28a4,-19.946967,-43.934837,Restaurant
272,Sion,-19.949362,-43.932587,CNR,4c49e18efbafc92810bf85db,-19.949476,-43.936330,Department Store


In [9]:
teste['response']['venue']['likes']['count']

NameError: name 'teste' is not defined

In [61]:
teste

{'meta': {'code': 200, 'requestId': '60480781ccb5ce3c753c1b50'},
 'response': {'venue': {'id': '54120e6a498e1cf9afe2a7fb',
   'name': 'lugar3',
   'contact': {},
   'location': {'lat': -20.195593405902308,
    'lng': -44.1046142578125,
    'labeledLatLngs': [{'label': 'display',
      'lat': -20.195593405902308,
      'lng': -44.1046142578125}],
    'cc': 'BR',
    'country': 'Brasil',
    'formattedAddress': ['Brasil']},
   'canonicalUrl': 'https://foursquare.com/v/lugar3/54120e6a498e1cf9afe2a7fb',
   'categories': [{'id': '4bf58dd8d48988d142941735',
     'name': 'Asian Restaurant',
     'pluralName': 'Asian Restaurants',
     'shortName': 'Asian',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_',
      'suffix': '.png'},
     'primary': True}],
   'verified': False,
   'stats': {'tipCount': 0},
   'price': {'tier': 2, 'message': 'Moderate', 'currency': '$'},
   'likes': {'count': 0, 'groups': []},
   'dislike': False,
   'ok': False,
   'allowMenuUrlEdit':

In [69]:
df['latitude'][1]

-19.9765791

In [11]:
df.reset_index(drop=True,inplace=True)

In [99]:
df['latitude'][0]

-19.9765791

In [109]:
df.to_csv('BH_df.csv')

In [3]:
df = pd.read_csv('BH_df.csv')

In [12]:
df

Unnamed: 0.1,Unnamed: 0,#,Zone,Regional,Neighborhood,Population,Size(km²),latitude,longitude
0,0,2,ZONA SUL,Oeste,Buritis,29 374,"3,823\n",-19.976579,-43.967416
1,1,3,ZONA OESTE,Noroeste,Padre Eustáquio,28 773,"2,969\n",-19.914305,-43.979356
2,2,4,ZONA OESTE,Barreiro,Lindéia,25 231,"2,012\n",-19.977414,-44.050635
3,3,5,ZONA NORTE,Pamp./ Venda Nova,Santa Mônica,23 883,"2,178\n",-19.824569,-43.977456
4,4,6,ZONA NORTE,Pampulha,Céu Azul,23 817,"2,748\n",-19.822750,-44.003980
...,...,...,...,...,...,...,...,...,...
361,361,479,ZONA NORTE,Pampulha,Xangri-lá,8,"0,526\n",-19.838104,-44.018154
362,362,481,ZONA NORTE,Pampulha,Bispo de Maura,0,"0,017\n",-19.828079,-44.016360
363,363,482,ZONA NORTE,Pampulha,Campus UFMG,0,"3,85\n",-19.867975,-43.964427
364,364,484,ZONA NORTE,Pampulha,Lagoa da Pampulha,0,"3,138\n",-19.853383,-43.974785
