## Libraries

In [38]:
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium # map rendering library

## Data Scraping

**Neighborhoods**

Neighborhoods Web scraping from wikipedia info

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Miami'
driver = webdriver.Chrome('./chromedriver')
driver.get(url)

In [12]:
table_info = driver.find_elements_by_xpath('//table[@class="wikitable sortable jquery-tablesorter"]/tbody/tr')
df_neighborhoods = pd.DataFrame(index=range(len(table_info)),columns=['NEIGHBORHOOD','LATITUDE','LONGITUDE'])

for i in range(len(table_info)):
    df_neighborhoods['NEIGHBORHOOD'][i] = table_info[i].get_attribute("innerText").split('\t')[0]
    df_neighborhoods['LATITUDE'][i] = table_info[i].get_attribute("innerText").split('\t')[-1].split(',')[0]
    try:
        df_neighborhoods['LONGITUDE'][i] = table_info[i].get_attribute("innerText").split('\t')[-1].split(',')[1]
    except:
        continue

In [35]:
driver.close()

In [13]:
df_neighborhoods.shape

(26, 3)

In [28]:
df_neighborhoods.head()

Unnamed: 0,NEIGHBORHOOD,LATITUDE,LONGITUDE
0,Allapattah,25.815,-80.224
1,Arts & Entertainment District,25.799,-80.19
2,Brickell,25.758,-80.193
3,Buena Vista,25.813,-80.192
4,Coconut Grove,25.712,-80.257


**Data cleaning**

Drops empty rows

In [34]:
df_neighborhoods.dropna(subset=['LONGITUDE'], inplace=True)

In [37]:
df_neighborhoods.reset_index(drop=True, inplace=True)

In [125]:
df_neighborhoods.shape

(24, 3)

**Find Miami coordinates**

Using geopy

In [39]:
address = 'Miami, FL'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Miami are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Miami are 25.7741728, -80.19362.


**Map Miami neighborhoods**

According to the information from wikipedia

In [42]:
map_miami = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(df_neighborhoods['LATITUDE'], df_neighborhoods['LONGITUDE'], df_neighborhoods['NEIGHBORHOOD']):
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_miami)  
    
map_miami

**Venues**

Use of Foursquare API to get venues from restaurants in Miami.

In [44]:
CLIENT_ID = 'LRUMXI4541PWXPNQ4RW2ZFHDM44XCXQKJOLK5QX1DPGE4NX1' # your Foursquare ID
CLIENT_SECRET = 'L4NA50LOTE1LZUBJGP3THFGJYVNERYYWV5BOUXFJEDTCHYDV' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value
radius = 500
query = 'restaurant'

In [46]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&query={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT,
            query)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [84]:
miami_venues = getNearbyVenues(names=df_neighborhoods['NEIGHBORHOOD'],
                               latitudes=df_neighborhoods['LATITUDE'],
                               longitudes=df_neighborhoods['LONGITUDE']
                              )

Allapattah
Arts & Entertainment District
Brickell
Buena Vista
Coconut Grove
Coral Way
Design District
Downtown
Edgewater
Flagami
Grapeland Heights
Liberty City
Little Haiti
Little Havana
Lummus Park
Midtown
Overtown
Park West
The Roads
Upper Eastside
Venetian Islands
Virginia Key
West Flagler
Wynwood


In [51]:
print(miami_venues.shape)
miami_venues.head()

(360, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Allapattah,25.815,-80.224,Visions Soul Food & Catering,25.813113,-80.224278,Southern / Soul Food Restaurant
1,Arts & Entertainment District,25.799,-80.19,Ted's at YoungArts,25.797405,-80.189712,Tapas Restaurant
2,Arts & Entertainment District,25.799,-80.19,Plant Food + Wine Miami,25.800452,-80.192805,Restaurant
3,Arts & Entertainment District,25.799,-80.19,The Daily Creative Food Co. - Miami,25.795696,-80.188683,Sandwich Place
4,Arts & Entertainment District,25.799,-80.19,Mister O1 Extraordinary Pizza,25.799129,-80.194791,Pizza Place


**Venues per neighborhood**

In [53]:
miami_venues.groupby('Neighborhood')['Venue'].count()

Neighborhood
Allapattah                        1
Arts & Entertainment District    13
Brickell                         41
Buena Vista                      23
Coconut Grove                     2
Coral Way                         4
Design District                  25
Downtown                         39
Edgewater                        42
Flagami                          10
Grapeland Heights                 2
Liberty City                      2
Little Haiti                     10
Little Havana                    10
Lummus Park                       9
Midtown                          41
Overtown                          4
Park West                        18
The Roads                         6
Upper Eastside                    4
West Flagler                      4
Wynwood                          50
Name: Venue, dtype: int64

In [54]:
print('There are {} uniques categories.'.format(len(miami_venues['Venue Category'].unique())))

There are 56 uniques categories.


**Top 5 neighborhoods with less venues**

In [64]:
miami_venues.groupby('Neighborhood')['Venue'].count().sort_values()

Neighborhood
Allapattah                        1
Liberty City                      2
Coconut Grove                     2
Grapeland Heights                 2
Coral Way                         4
Upper Eastside                    4
West Flagler                      4
Overtown                          4
The Roads                         6
Lummus Park                       9
Little Haiti                     10
Flagami                          10
Little Havana                    10
Arts & Entertainment District    13
Park West                        18
Buena Vista                      23
Design District                  25
Downtown                         39
Brickell                         41
Midtown                          41
Edgewater                        42
Wynwood                          50
Name: Venue, dtype: int64

Since several neighborhoods have the same number of venues, the first 8 positions will be taken into account.

In [70]:
df_miami_venues = miami_venues.groupby('Neighborhood')['Venue'].count().sort_values().head(8)
df_miami_venues

Neighborhood
Allapattah           1
Liberty City         2
Coconut Grove        2
Grapeland Heights    2
Coral Way            4
Upper Eastside       4
West Flagler         4
Overtown             4
Name: Venue, dtype: int64

In [83]:
map_miami = folium.Map(location=[latitude, longitude], zoom_start=12)

df_map = df_neighborhoods[df_neighborhoods['NEIGHBORHOOD'].isin(df_miami_venues.index)]

# add markers to map
for lat, lng, neighborhood in zip(df_map['LATITUDE'], df_map['LONGITUDE'], df_map['NEIGHBORHOOD']):
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_miami)  
    
map_miami

## Neighborhoods Analysis

In [85]:
# one hot encoding
miami_onehot = pd.get_dummies(miami_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
miami_onehot['Neighborhood'] = miami_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [miami_onehot.columns[-1]] + list(miami_onehot.columns[:-1])
miami_onehot = miami_onehot[fixed_columns]

miami_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bistro,Brazilian Restaurant,...,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Taco Place,Tapas Restaurant,Vegetarian / Vegan Restaurant,Wings Joint
0,Allapattah,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,Arts & Entertainment District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,Arts & Entertainment District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Arts & Entertainment District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Arts & Entertainment District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [86]:
miami_grouped = miami_onehot.groupby('Neighborhood').mean().reset_index()
miami_grouped

Unnamed: 0,Neighborhood,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bistro,Brazilian Restaurant,...,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Taco Place,Tapas Restaurant,Vegetarian / Vegan Restaurant,Wings Joint
0,Allapattah,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Arts & Entertainment District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0
2,Brickell,0.097561,0.0,0.04878,0.02439,0.0,0.0,0.02439,0.0,0.0,...,0.0,0.0,0.0,0.02439,0.02439,0.0,0.0,0.0,0.0,0.0
3,Buena Vista,0.086957,0.043478,0.0,0.043478,0.0,0.0,0.043478,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Coconut Grove,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Coral Way,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Design District,0.08,0.04,0.0,0.04,0.0,0.0,0.04,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Downtown,0.051282,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.051282,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0
8,Edgewater,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.02381,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.047619,0.0,0.0
9,Flagami,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,...,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0


**Top 5 venues**

In [87]:
num_top_venues = 5

for hood in miami_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = miami_grouped[miami_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Allapattah----
                             venue  freq
0  Southern / Soul Food Restaurant   1.0
1              American Restaurant   0.0
2                       Poke Place   0.0
3                Indian Restaurant   0.0
4            Indonesian Restaurant   0.0


----Arts & Entertainment District----
              venue  freq
0        Restaurant  0.31
1       Pizza Place  0.23
2  Tapas Restaurant  0.15
3  Cuban Restaurant  0.08
4    Sandwich Place  0.08


----Brickell----
                 venue  freq
0           Restaurant  0.12
1  American Restaurant  0.10
2                 Café  0.10
3   Italian Restaurant  0.10
4  Japanese Restaurant  0.10


----Buena Vista----
                 venue  freq
0                 Café  0.17
1          Pizza Place  0.13
2  American Restaurant  0.09
3           Restaurant  0.09
4   Italian Restaurant  0.09


----Coconut Grove----
                   venue  freq
0    American Restaurant   0.5
1    Fried Chicken Joint   0.5
2       Greek Restaurant   0.0
3 

In [88]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [89]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = miami_grouped['Neighborhood']

for ind in np.arange(miami_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(miami_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Allapattah,Southern / Soul Food Restaurant,Wings Joint,Chinese Restaurant,Food Truck,Food Court
1,Arts & Entertainment District,Restaurant,Pizza Place,Tapas Restaurant,Sandwich Place,Cuban Restaurant
2,Brickell,Restaurant,American Restaurant,Café,Italian Restaurant,Japanese Restaurant
3,Buena Vista,Café,Pizza Place,Restaurant,Italian Restaurant,American Restaurant
4,Coconut Grove,American Restaurant,Fried Chicken Joint,Arepa Restaurant,Argentinian Restaurant,French Restaurant
5,Coral Way,Food Truck,Seafood Restaurant,Burger Joint,Café,Wings Joint
6,Design District,Café,Pizza Place,Sandwich Place,Italian Restaurant,Japanese Restaurant
7,Downtown,Italian Restaurant,Peruvian Restaurant,Restaurant,American Restaurant,Brazilian Restaurant
8,Edgewater,Restaurant,Pizza Place,Food Truck,American Restaurant,Bakery
9,Flagami,Bakery,Seafood Restaurant,Spanish Restaurant,Latin American Restaurant,Restaurant


In [127]:
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Neighborhood'].isin(df_miami_venues.index)]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,2,Allapattah,Southern / Soul Food Restaurant,Wings Joint,Chinese Restaurant,Food Truck,Food Court
4,1,Coconut Grove,American Restaurant,Fried Chicken Joint,Arepa Restaurant,Argentinian Restaurant,French Restaurant
5,1,Coral Way,Food Truck,Seafood Restaurant,Burger Joint,Café,Wings Joint
10,0,Grapeland Heights,Restaurant,Wings Joint,Chinese Restaurant,Food Truck,Food Court
11,2,Liberty City,Food,Southern / Soul Food Restaurant,Wings Joint,Chinese Restaurant,Food Truck
16,2,Overtown,Southern / Soul Food Restaurant,Wings Joint,Food Court,Chinese Restaurant,Food Truck
19,1,Upper Eastside,Pizza Place,Food Court,Sushi Restaurant,Italian Restaurant,Wings Joint
20,1,West Flagler,Comfort Food Restaurant,Bakery,Latin American Restaurant,Cuban Restaurant,Wings Joint


## Clustering Neighborhoods 

Find similar neighborhoods to define what type of restaurant to open.

In [109]:
# set number of clusters
kclusters = 3

miami_grouped_clustering = miami_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(miami_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [110]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

miami_merged = df_neighborhoods.copy()
miami_merged.columns = ['Neighborhood','Latitude','Longitude']

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
miami_merged = miami_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

miami_merged.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Allapattah,25.815,-80.224,2.0,Southern / Soul Food Restaurant,Wings Joint,Chinese Restaurant,Food Truck,Food Court
1,Arts & Entertainment District,25.799,-80.19,1.0,Restaurant,Pizza Place,Tapas Restaurant,Sandwich Place,Cuban Restaurant
2,Brickell,25.758,-80.193,1.0,Restaurant,American Restaurant,Café,Italian Restaurant,Japanese Restaurant
3,Buena Vista,25.813,-80.192,1.0,Café,Pizza Place,Restaurant,Italian Restaurant,American Restaurant
4,Coconut Grove,25.712,-80.257,1.0,American Restaurant,Fried Chicken Joint,Arepa Restaurant,Argentinian Restaurant,French Restaurant


In [111]:
miami_merged.dropna(subset=['Cluster Labels'], inplace=True)
miami_merged['Cluster Labels'] = miami_merged['Cluster Labels'].astype(int)

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(miami_merged['Latitude'], miami_merged['Longitude'], miami_merged['Neighborhood'], miami_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

**Clusters Review**

Cluster 1

In [122]:
miami_merged.loc[miami_merged['Cluster Labels'] == 0, miami_merged.columns[[0] + list(range(4, miami_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
10,Grapeland Heights,Restaurant,Wings Joint,Chinese Restaurant,Food Truck,Food Court


Cluster 2

In [123]:
miami_merged.loc[miami_merged['Cluster Labels'] == 1, miami_merged.columns[[0] + list(range(4, miami_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Arts & Entertainment District,Restaurant,Pizza Place,Tapas Restaurant,Sandwich Place,Cuban Restaurant
2,Brickell,Restaurant,American Restaurant,Café,Italian Restaurant,Japanese Restaurant
3,Buena Vista,Café,Pizza Place,Restaurant,Italian Restaurant,American Restaurant
4,Coconut Grove,American Restaurant,Fried Chicken Joint,Arepa Restaurant,Argentinian Restaurant,French Restaurant
5,Coral Way,Food Truck,Seafood Restaurant,Burger Joint,Café,Wings Joint
6,Design District,Café,Pizza Place,Sandwich Place,Italian Restaurant,Japanese Restaurant
7,Downtown,Italian Restaurant,Peruvian Restaurant,Restaurant,American Restaurant,Brazilian Restaurant
8,Edgewater,Restaurant,Pizza Place,Food Truck,American Restaurant,Bakery
9,Flagami,Bakery,Seafood Restaurant,Spanish Restaurant,Latin American Restaurant,Restaurant
12,Little Haiti,Pizza Place,Spanish Restaurant,Caribbean Restaurant,Italian Restaurant,Donut Shop


Cluster 3

In [124]:
miami_merged.loc[miami_merged['Cluster Labels'] == 2, miami_merged.columns[[0] + list(range(4, miami_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Allapattah,Southern / Soul Food Restaurant,Wings Joint,Chinese Restaurant,Food Truck,Food Court
11,Liberty City,Food,Southern / Soul Food Restaurant,Wings Joint,Chinese Restaurant,Food Truck
16,Overtown,Southern / Soul Food Restaurant,Wings Joint,Food Court,Chinese Restaurant,Food Truck
