# Battle Of Neighborhoods 
## (Week 4 and 5)

### Import Libraries:


In [1]:
import pandas as pd           #Import Pandas for dataframe
import json                          #for formatting data from foursquare
import folium                        #for creating maps
import numpy as np                   #for data sorting functions
from sklearn.cluster import KMeans   #for kmeans clustering
import matplotlib.cm as cm           #for map markers
import matplotlib.colors as colors
import requests

# Regions
### Canada:
##### *Downtown Toronto*
Richmond Hill <br>
Georgetown <br>
Newmarket <br>
Rockton <br>
Port Perry<br>


### New York:
##### *Manhattan* 
Pleasant Valley <br>
Newark <br>
Stafford <br>
Albany<br>
Utica<br>

### France:

##### *Paris*
Montargis <br>
Toucy <br>
Genneville <br>
Reims<br>
Tours<br>

In [2]:
#!conda install -c conda-forge geocoder --yes
#import geocoder

In [3]:
# initialize your variable to None
#lat_lng_coords = None

# loop until you get the coordinates
#while(lat_lng_coords is None):
#    g = geocoder.google('Manhattan, NY')
#    lat_lng_coords = g.latlng

#latitude = lat_lng_coords[0]
#longitude = lat_lng_coords[1]

In [4]:
#I can't get geocoder to respond, so I looked up the coordinates on google and put them in this dictionary:
LatLang = {
'Canada':
    (('Downtown Toronto',(43.6543,-79.3860)),
    ('Richmond Hill',(43.887501,-79.428406)),
    ('Georgetown',(43.6502,-79.9036)),
    ('Newmarket',(44.0592,-79.4613)),
    ('Rockton',(43.3013,-80.1282)),
    ('Port Perry',(44.1050,-78.9441))),
'New York':
    (('Manhattan',(40.7831,-73.9712)),
    ('Pleasant Valley',(41.7445,-73.8212)),
    ('Newark',(43.0467,-77.0953)),
    ('Stafford',(42.9817,-78.0735)),
    ('Albany',(42.6526,-73.7562)),
    ('Utica',(43.1009,-75.2327))),
'France':
    (('Paris',(48.8566,2.3522)),
    ('Montargis',(47.9973,2.7363)),
    ('Toucy',(47.7351,3.2944)),
    ('Genneville',(49.3700,0.2749)),
    ('Reims',(49.2583,4.0317)),
    ('Tours',(47.3941,0.6848)))
}

### Let's look at a plot of each region to see where the towns are relative to their city centers

The blue dots on each map mark the towns and cities and the red circles around them mark the area that will be included in the Foursquare API call

In [5]:
map_canada = folium.Map(width=500,height=500,location=[LatLang['Canada'][0][1][0],LatLang['Canada'][0][1][1]], zoom_start=7.5)
for i in range(6):
    if i == 0:
        r=2000
    elif i>0:
        r=20000    
    folium.Circle([LatLang['Canada'][i][1][0], LatLang['Canada'][i][1][1]],
                      radius=r,
                      color='#EE204D',
                      fill=True,
                      fill_opacity=0.7
                   ).add_to(map_canada)
for i in range(6):    
    label = '{}'.format(LatLang['Canada'][i][0])
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [LatLang['Canada'][i][1][0], LatLang['Canada'][i][1][1]],
        radius=1,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_canada)  
map_canada

In [6]:
map_france = folium.Map(width=500,height=500,location=[LatLang['France'][0][1][0],LatLang['France'][0][1][1]], zoom_start=7)
for i in range(6):
    if i == 0:
        r=2000
    elif i>0:
        r=20000    
    folium.Circle([LatLang['France'][i][1][0], LatLang['France'][i][1][1]],
                      radius=r,
                      color='#EE204D',
                      fill=True,
                      fill_opacity=0.7
                   ).add_to(map_france)
for i in range(6):
    label = '{}'.format(LatLang['France'][i][0])
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [LatLang['France'][i][1][0], LatLang['France'][i][1][1]],
        radius=1,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_france)  
map_france

In [7]:
map_ny = folium.Map(width=500,height=500,location=[41.4090,-75.6624], zoom_start=6.45)
for i in range(6):
    if i == 0:
        r=2000
    elif i>0:
        r=20000    
    folium.Circle([LatLang['New York'][i][1][0], LatLang['New York'][i][1][1]],
                      radius=r,
                      color='#EE204D',
                      fill=True,
                      fill_opacity=0.7
                   ).add_to(map_ny)
for i in range(6):
    label = '{}'.format(LatLang['New York'][i][0])
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [LatLang['New York'][i][1][0], LatLang['New York'][i][1][1]],
        radius=1,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_ny)

    
map_ny

### Next we'll make the Foursquare API Calls and collect venue data for each region

In [8]:
#Foursquare API Credentials:

CLIENT_ID = 'GW2N4RRPFVS3ZEJDL0U3U5M2JK4ORZPXRSJVEWHYVW3RROHC' # your Foursquare ID
CLIENT_SECRET = 'UGZMHHNQX0PE55TKWCMME04HVTNZQHT4SFQ01PYMEN1MKYUS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

LIMIT = 10000 # limit of number of venues returned by Foursquare API

In [9]:
def getVenues(place,radius=2000):
    venues_list=[]
    for i in range(6):
        lat=place[i][1][0]
        lng=place[i][1][1]
        if i>0:
            radius=20000
            # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)

            # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
                place[i][0], 
                place[i][1][0], 
                place[i][1][1], 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])
    

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                    'Neighborhood Latitude', 
                    'Neighborhood Longitude', 
                    'Venue', 
                    'Venue Latitude', 
                    'Venue Longitude', 
                    'Venue Category']   
    return nearby_venues

In [10]:
FranceVenues = getVenues(LatLang['France'])
print(FranceVenues.shape)
#FranceVenues.head()

(397, 7)


In [11]:
CanadaVenues=getVenues(LatLang['Canada'])
print(CanadaVenues.shape)
#CanadaVenues.head()

(569, 7)


In [12]:
NYVenues=getVenues(LatLang['New York'])
print(NYVenues.shape)
#NYVenues.head()

(556, 7)


In [13]:
AllVenues = pd.concat([FranceVenues,CanadaVenues,NYVenues])
print(AllVenues.shape)

(1522, 7)


### Now the data needs to be prepared for K means clustering.  For this we'll use One Hot Encoding

In [14]:
# one hot encoding
Venues_onehot = pd.get_dummies(AllVenues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Venues_onehot['Neighborhood'] = AllVenues['Neighborhood'] 

Venues_grouped = Venues_onehot.groupby('Neighborhood').mean().reset_index()
print(Venues_grouped.shape)
Venues_grouped.head()


(18, 250)


Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Warehouse Store,Waterfall,Waterfront,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo
0,Albany,0.0,0.0,0.07,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
1,Downtown Toronto,0.0,0.0,0.02,0.0,0.02,0.0,0.02,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0
2,Genneville,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01
3,Georgetown,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0
4,Manhattan,0.0,0.0,0.02,0.0,0.01,0.05,0.01,0.0,0.0,...,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.01,0.0


In [15]:

LocDF=pd.DataFrame()#(columns=['Neighborhood','Latitude','Longitude'])
N=[]
Lat=[]
Lon=[]
C=[]
for i in range(6):
    C.append('France')
    N.append(LatLang['France'][i][0])
    Lat.append(LatLang['France'][i][1][0])
    Lon.append(LatLang['France'][i][1][1])
for i in range(6):
    C.append('Canada')
    N.append(LatLang['Canada'][i][0])
    Lat.append(LatLang['Canada'][i][1][0])
    Lon.append(LatLang['Canada'][i][1][1])
for i in range(6):
    C.append('New York')
    N.append(LatLang['New York'][i][0])
    Lat.append(LatLang['New York'][i][1][0])
    Lon.append(LatLang['New York'][i][1][1])
LocDF['Country']=C
LocDF['Neighborhood']=N
#LocDF['Latitude']=Lat
#LocDF['Longitude']=Lon
LocDF

Unnamed: 0,Country,Neighborhood
0,France,Paris
1,France,Montargis
2,France,Toucy
3,France,Genneville
4,France,Reims
5,France,Tours
6,Canada,Downtown Toronto
7,Canada,Richmond Hill
8,Canada,Georgetown
9,Canada,Newmarket


### Let's look at the most common venues in each region

In [16]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [17]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Venues_grouped['Neighborhood']

for ind in np.arange(Venues_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Venues_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Albany,American Restaurant,Pub,Café,Ice Cream Shop,Deli / Bodega,Coffee Shop,Brewery,Bar,Burger Joint,Bakery
1,Downtown Toronto,Coffee Shop,Café,Hotel,Pizza Place,Theater,Beer Bar,Gastropub,Restaurant,Sushi Restaurant,Bookstore
2,Genneville,French Restaurant,Seafood Restaurant,Beach,Hotel,Farmers Market,Beach Bar,Lounge,Nightclub,Restaurant,Resort
3,Georgetown,Coffee Shop,Golf Course,Grocery Store,Italian Restaurant,Liquor Store,Pub,BBQ Joint,Bakery,Park,Steakhouse
4,Manhattan,Park,Exhibit,Art Museum,Bakery,Italian Restaurant,Plaza,Playground,Garden,Seafood Restaurant,Bookstore


### Use K-Means clustering to cluster regions based on their most common regions

In [18]:
# set number of clusters
kclusters = 4

Venues_grouped_clustering = Venues_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Venues_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 0, 2, 2, 1, 2, 2, 2, 2])

In [19]:
Venues_merged = pd.merge(LocDF,neighborhoods_venues_sorted,on='Neighborhood')
Venues_merged.insert(0, 'Cluster Labels', kmeans.labels_)
Venues_merged 

Unnamed: 0,Cluster Labels,Country,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2,France,Paris,French Restaurant,Plaza,Cocktail Bar,Ice Cream Shop,Japanese Restaurant,Art Gallery,Wine Bar,Restaurant,Bookstore,Bakery
1,2,France,Montargis,Restaurant,Train Station,Supermarket,Fast Food Restaurant,Coffee Shop,Hotel,Multiplex,Grocery Store,Steakhouse,Construction & Landscaping
2,0,France,Toucy,Supermarket,Castle,Rest Area,History Museum,Outdoor Supply Store,Theme Park,Bar,French Restaurant,Donut Shop,Electronics Store
3,2,France,Genneville,French Restaurant,Seafood Restaurant,Beach,Hotel,Farmers Market,Beach Bar,Lounge,Nightclub,Restaurant,Resort
4,2,France,Reims,French Restaurant,Hotel,Train Station,Vineyard,Plaza,Winery,Rest Area,Wine Bar,Furniture / Home Store,Historic Site
5,1,France,Tours,Train Station,Hotel,French Restaurant,Plaza,Garden,Pub,Italian Restaurant,Fast Food Restaurant,Rest Area,Beer Garden
6,2,Canada,Downtown Toronto,Coffee Shop,Café,Hotel,Pizza Place,Theater,Beer Bar,Gastropub,Restaurant,Sushi Restaurant,Bookstore
7,2,Canada,Richmond Hill,Italian Restaurant,Theme Park Ride / Attraction,Supermarket,Gym,Bagel Shop,Hotel,Golf Course,Pizza Place,Liquor Store,Dessert Shop
8,2,Canada,Georgetown,Coffee Shop,Golf Course,Grocery Store,Italian Restaurant,Liquor Store,Pub,BBQ Joint,Bakery,Park,Steakhouse
9,2,Canada,Newmarket,Golf Course,Liquor Store,Farm,Sushi Restaurant,Diner,Burger Joint,Seafood Restaurant,Coffee Shop,Movie Theater,Mexican Restaurant


In [20]:
Venues_merged.sort_values(by=['Cluster Labels'])

Unnamed: 0,Cluster Labels,Country,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,0,France,Toucy,Supermarket,Castle,Rest Area,History Museum,Outdoor Supply Store,Theme Park,Bar,French Restaurant,Donut Shop,Electronics Store
11,0,Canada,Port Perry,Coffee Shop,Fast Food Restaurant,Pharmacy,Grocery Store,Farm,Golf Course,Trail,Sandwich Place,Restaurant,Brewery
16,0,New York,Albany,American Restaurant,Pub,Café,Ice Cream Shop,Deli / Bodega,Coffee Shop,Brewery,Bar,Burger Joint,Bakery
5,1,France,Tours,Train Station,Hotel,French Restaurant,Plaza,Garden,Pub,Italian Restaurant,Fast Food Restaurant,Rest Area,Beer Garden
0,2,France,Paris,French Restaurant,Plaza,Cocktail Bar,Ice Cream Shop,Japanese Restaurant,Art Gallery,Wine Bar,Restaurant,Bookstore,Bakery
14,2,New York,Newark,Clothing Store,Discount Store,Fast Food Restaurant,Sandwich Place,Diner,Supermarket,Pharmacy,Sporting Goods Shop,Bar,Pizza Place
13,2,New York,Pleasant Valley,Pizza Place,American Restaurant,Italian Restaurant,Brewery,Convenience Store,Restaurant,Café,French Restaurant,Farm,Diner
12,2,New York,Manhattan,Park,Exhibit,Art Museum,Bakery,Italian Restaurant,Plaza,Playground,Garden,Seafood Restaurant,Bookstore
10,2,Canada,Rockton,Grocery Store,Coffee Shop,Brewery,Trail,Restaurant,Golf Course,Pub,Indian Restaurant,Café,American Restaurant
8,2,Canada,Georgetown,Coffee Shop,Golf Course,Grocery Store,Italian Restaurant,Liquor Store,Pub,BBQ Joint,Bakery,Park,Steakhouse


### Observation and analysis of clusters relative to their regions

There does not seem to be an observable relationship between the distribution of venues in different countries or between urban centers and surrounding suburban / rural areas.  Part of this is probably because there are too many different venue types returned by Foursquare API, the KNN algorithm does not know that coffee shops are similar to cafes and pubs are similar to bars and beer gardens, etc.  It might be possible to improve the algorithm if the foursquare categories were more generalized, or if a different algorithm were used to determine similarity between different venue categories.