# Import the Libraries Needed 

In [71]:
import pandas as pd
import numpy as np
import requests # library to handle requests

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

# import k-means from clustering stage
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

## Define my list of cities for analysis

In [73]:
address_list = [
    "New York, NY, USA",
    "Montreal, QC, Canada",
    "Toronto, ON, Canada",
    "Boston, MA, USA",
    "Atlanta, GA, USA",
    "New Orleans, LA, USA",
    "Seattle, WA, USA",
    "Portland, OR, USA",
    "San Diego, CA, USA",
    "Los Angeles, CA, USA",
    "Edinburgh, UK",
    "Cardiff, UK",
    "Bristol, UK",
    "London, UK",
    "Paris, FR"
]

## Find Our Starting Locations For Each City

In [74]:
geolocator = Nominatim()
city_list=[]

for i, address in enumerate(address_list):
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
    city_list.append({"City":address.split(',')[0],"Address":address,"Latitude":latitude,"Longitude":longitude})
    
city_list_ds=pd.DataFrame(city_list)

  if __name__ == '__main__':


The geograpical coordinate of New York, NY, USA are 40.7308619, -73.9871558.
The geograpical coordinate of Montreal, QC, Canada are 45.4972159, -73.6103642.
The geograpical coordinate of Toronto, ON, Canada are 43.653963, -79.387207.
The geograpical coordinate of Boston, MA, USA are 42.3602534, -71.0582912.
The geograpical coordinate of Atlanta, GA, USA are 33.7490987, -84.3901849.
The geograpical coordinate of New Orleans, LA, USA are 29.9499323, -90.0701156.
The geograpical coordinate of Seattle, WA, USA are 47.6038321, -122.3300624.
The geograpical coordinate of Portland, OR, USA are 45.5202471, -122.6741949.
The geograpical coordinate of San Diego, CA, USA are 32.7174209, -117.1627714.
The geograpical coordinate of Los Angeles, CA, USA are 34.0536834, -118.2427669.
The geograpical coordinate of Edinburgh, UK are 55.953346, -3.1883739.
The geograpical coordinate of Cardiff, UK are 51.4816546, -3.1791934.
The geograpical coordinate of Bristol, UK are 51.4538022, -2.5972985.
The geogr

In [75]:
city_list_ds.head()

Unnamed: 0,Address,City,Latitude,Longitude
0,"New York, NY, USA",New York,40.730862,-73.987156
1,"Montreal, QC, Canada",Montreal,45.497216,-73.610364
2,"Toronto, ON, Canada",Toronto,43.653963,-79.387207
3,"Boston, MA, USA",Boston,42.360253,-71.058291
4,"Atlanta, GA, USA",Atlanta,33.749099,-84.390185


## Setup to use Foursquare

In [76]:
LIMIT=1000
CLIENT_ID = 'B5KCB5H4CDDAACTXUEM0AFKZNN0WVT2YSP0AFHUAV25Q1XLJ' # your Foursquare ID
CLIENT_SECRET = 'BXOF0EESEO0P4RW1AYQWCU1BOEHE4RDUZU25MQAITDMZCPFX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

#print('Your credentails:')
#print('CLIENT_ID: ' + CLIENT_ID)
#print('CLIENT_SECRET:' + CLIENT_SECRET)

## Define a function to make it easier to get data for each City

In [77]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&section=food'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City', 
                  'City Latitude', 
                  'City Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Get the FourSquare data for each City

In [78]:
world_food_data = getNearbyVenues(names=city_list_ds['City'], latitudes=city_list_ds['Latitude'], longitudes=city_list_ds['Longitude'])
print(world_food_data.shape)
#world_food_data.head()

New York
Montreal
Toronto
Boston
Atlanta
New Orleans
Seattle
Portland
San Diego
Los Angeles
Edinburgh
Cardiff
Bristol
London
Paris
(1330, 7)


## Review what we have for each City

In [79]:
print(world_food_data.groupby('City').count())
print('There are {} uniques categories.'.format(len(world_food_data['Venue Category'].unique())))

             City Latitude  City Longitude  Venue  Venue Latitude  \
City                                                                
Atlanta                 55              55     55              55   
Boston                 100             100    100             100   
Bristol                100             100    100             100   
Cardiff                 49              49     49              49   
Edinburgh              100             100    100             100   
London                 100             100    100             100   
Los Angeles            100             100    100             100   
Montreal                26              26     26              26   
New Orleans            100             100    100             100   
New York               100             100    100             100   
Paris                  100             100    100             100   
Portland               100             100    100             100   
San Diego              100        

In [80]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

## Twist the data into analyzable form

In [81]:
# one hot encoding
world_food_onehot = pd.get_dummies(world_food_data[['Venue Category']], prefix="", prefix_sep="")

# add city column back to dataframe
world_food_onehot['City'] = world_food_data['City'] 

# move city column to the first column
fixed_columns = [world_food_onehot.columns[-1]] + list(world_food_onehot.columns[:-1])
world_food_onehot = world_food_onehot[fixed_columns]

# world_food_onehot.head()

world_food_grouped = world_food_onehot.groupby('City').mean().reset_index()
world_food_grouped

Unnamed: 0,City,African Restaurant,Alsatian Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,Auvergne Restaurant,BBQ Joint,Bagel Shop,...,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint,Yoshoku Restaurant
0,Atlanta,0.0,0.0,0.090909,0.0,0.0,0.018182,0.0,0.018182,0.0,...,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.036364,0.0
1,Boston,0.0,0.0,0.07,0.0,0.0,0.01,0.0,0.0,0.01,...,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bristol,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.03,0.0,...,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0
3,Cardiff,0.0,0.0,0.020408,0.0,0.0,0.020408,0.0,0.0,0.0,...,0.040816,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Edinburgh,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,...,0.01,0.02,0.0,0.01,0.0,0.0,0.04,0.0,0.01,0.0
5,London,0.02,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.0,...,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
6,Los Angeles,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.02,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01
7,Montreal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,New Orleans,0.0,0.0,0.06,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0
9,New York,0.0,0.0,0.03,0.01,0.0,0.01,0.0,0.01,0.05,...,0.01,0.0,0.0,0.0,0.02,0.01,0.05,0.03,0.0,0.0


## Pick the  top 10 Important City Restaurants for the KMeans

In [82]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
city_venues_sorted = pd.DataFrame(columns=columns)
city_venues_sorted['City'] = world_food_grouped['City']

for ind in np.arange(world_food_grouped.shape[0]):
    city_venues_sorted.iloc[ind, 1:] = return_most_common_venues(world_food_grouped.iloc[ind, :], num_top_venues)

city_venues_sorted

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Atlanta,Sandwich Place,Fast Food Restaurant,American Restaurant,Mexican Restaurant,Café,Wings Joint,Breakfast Spot,Indian Restaurant,Mediterranean Restaurant,Caribbean Restaurant
1,Boston,Italian Restaurant,Seafood Restaurant,American Restaurant,Bakery,Pizza Place,Sandwich Place,Mediterranean Restaurant,Sushi Restaurant,Restaurant,New American Restaurant
2,Bristol,Café,Italian Restaurant,Pizza Place,Indian Restaurant,Sandwich Place,Restaurant,Asian Restaurant,Burger Joint,Steakhouse,BBQ Joint
3,Cardiff,Italian Restaurant,Café,Restaurant,Bakery,Deli / Bodega,Sandwich Place,Portuguese Restaurant,Burger Joint,Mexican Restaurant,Tapas Restaurant
4,Edinburgh,Café,Restaurant,French Restaurant,Italian Restaurant,Indian Restaurant,Seafood Restaurant,Mexican Restaurant,Gastropub,Vegetarian / Vegan Restaurant,Sandwich Place
5,London,French Restaurant,Burger Joint,Steakhouse,Italian Restaurant,Bakery,Japanese Restaurant,Restaurant,Seafood Restaurant,Sushi Restaurant,Café
6,Los Angeles,Japanese Restaurant,Mexican Restaurant,Sushi Restaurant,Italian Restaurant,Ramen Restaurant,Chinese Restaurant,Sandwich Place,Seafood Restaurant,Gastropub,French Restaurant
7,Montreal,Café,Restaurant,Fast Food Restaurant,Bakery,Sandwich Place,Burger Joint,French Restaurant,Sushi Restaurant,Russian Restaurant,Eastern European Restaurant
8,New Orleans,Cajun / Creole Restaurant,Seafood Restaurant,Restaurant,Café,American Restaurant,Italian Restaurant,Steakhouse,New American Restaurant,French Restaurant,Sandwich Place
9,New York,Japanese Restaurant,Pizza Place,Chinese Restaurant,Bagel Shop,Italian Restaurant,Vegetarian / Vegan Restaurant,Seafood Restaurant,Café,Mexican Restaurant,American Restaurant


## Run kmeans against the World City Food Data

In [83]:
# set number of clusters
kclusters = 5

world_food_grouped_clustering = world_food_grouped.drop('City', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(world_food_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 3, 3, 3, 4, 4, 3, 3, 4], dtype=int32)

## Merge analysis cluster labels back into data, and display simplified results.

In [84]:
world_food_merged = city_list_ds

# add clustering labels
world_food_merged['Cluster Labels'] = kmeans.labels_

# merge city_venues_sorted with world_food_merged to add latitude/longitude for each city
world_food_merged = world_food_merged.join(city_venues_sorted.set_index('City'), on='City')

# world_food_merged.head() # check the last columns!
print(world_food_merged[['City','Cluster Labels']])

           City  Cluster Labels
0      New York               1
1      Montreal               0
2       Toronto               3
3        Boston               3
4       Atlanta               3
5   New Orleans               4
6       Seattle               4
7      Portland               3
8     San Diego               3
9   Los Angeles               4
10    Edinburgh               2
11      Cardiff               1
12      Bristol               0
13       London               4
14        Paris               4


# Cardiff, UK has been grouped with New York City, NY, USA
Our recommendation will be that expansion should be overseas in the UK

## Create Map Showing the like clusters

In [85]:
# create map
map_clusters = folium.Map(location=[30.876551, -59.910660], zoom_start=4)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(world_food_merged['Latitude'], world_food_merged['Longitude'], world_food_merged['City'], world_food_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=4,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Findings are - From Our Target City of New York, NY, USA - Cardiff UK is the best match for expansion

I do not believe the GitHub Version will display the map.