# Import Relevant Libraries

In [1]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

import requests # library to handle requests
import random # library for random number generation

import numpy as np

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

from pandas.io.json import json_normalize

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    altair-3.3.0               |           py36_0         747 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.2 MB

The following NEW packages will be 

# Import Location and Geospatial Data for New York Neighborhoods

In [2]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

# Pull JSON data into a Pandas Dataframe

In [4]:
neighborhoods_data = newyork_data['features']

In [5]:
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

neighborhoods = pd.DataFrame(columns=column_names)

neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


In [6]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
    
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


# Filter Dataframe to Only Show Bronx

In [7]:
bronx_data = neighborhoods[neighborhoods['Borough'] == 'Bronx'].reset_index(drop=True)
bronx_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


# Determine Geospatial Location of Bronx and Map Bronx

In [8]:
address = 'Bronx, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bronx are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Bronx are 40.8466508, -73.8785937.


In [9]:
map_bronx = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(bronx_data['Latitude'], bronx_data['Longitude'], bronx_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bronx)  
    
map_bronx

# Call on Foursquare API to Obtain List and Location of Popular Venues

In [10]:
CLIENT_ID = 'GHIQAO2W4PWCAVGY4UN0ZCPSN5CABBHHGF3NYV3J0XXEFXWU' 
CLIENT_SECRET = 'KHT2FPLXXMPKUHODU4LG2CA2MOL1YMCHZGLXJVEEZMYCHG0G' 
VERSION = '20180605' 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GHIQAO2W4PWCAVGY4UN0ZCPSN5CABBHHGF3NYV3J0XXEFXWU
CLIENT_SECRET:KHT2FPLXXMPKUHODU4LG2CA2MOL1YMCHZGLXJVEEZMYCHG0G


In [11]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=GHIQAO2W4PWCAVGY4UN0ZCPSN5CABBHHGF3NYV3J0XXEFXWU&client_secret=KHT2FPLXXMPKUHODU4LG2CA2MOL1YMCHZGLXJVEEZMYCHG0G&v=20180605&ll=40.8466508,-73.8785937&radius=500&limit=100'

In [12]:
results = requests.get(url).json()

In [13]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [14]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]["groups"][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [17]:
bronx_venues = getNearbyVenues(names=bronx_data['Neighborhood'],
                                   latitudes=bronx_data['Latitude'],
                                   longitudes=bronx_data['Longitude']
                                  )

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Claremont Village
Concourse Village
Mount Eden
Mount Hope
Bronxdale
Allerton
Kingsbridge Heights


In [18]:
print(bronx_venues.shape)
bronx_venues.head()

(1199, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
1,Wakefield,40.894705,-73.847201,Rite Aid,40.896649,-73.844846,Pharmacy
2,Wakefield,40.894705,-73.847201,Carvel Ice Cream,40.890487,-73.848568,Ice Cream Shop
3,Wakefield,40.894705,-73.847201,Shell,40.894187,-73.845862,Gas Station
4,Wakefield,40.894705,-73.847201,Dunkin',40.890459,-73.849089,Donut Shop


In [19]:
bronx_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Allerton,28,28,28,28,28,28
Baychester,20,20,20,20,20,20
Bedford Park,31,31,31,31,31,31
Belmont,100,100,100,100,100,100
Bronxdale,12,12,12,12,12,12
Castle Hill,10,10,10,10,10,10
City Island,26,26,26,26,26,26
Claremont Village,21,21,21,21,21,21
Clason Point,9,9,9,9,9,9
Co-op City,16,16,16,16,16,16


# Perform Onehot Analysis To Determine Frequency of Pizzerias in Each Neighborhood

In [20]:
bronx_onehot = pd.get_dummies(bronx_venues[['Venue Category']], prefix="", prefix_sep="")

bronx_onehot['Neighborhood'] = bronx_venues['Neighborhood'] 

Bronx=bronx_onehot
col3="Neighborhood"
Bronx = pd.concat([Bronx[col3],Bronx.drop(col3,axis=1)], axis=1)

Bronx_grouped = Bronx.groupby('Neighborhood').mean().reset_index()

Bronx_grouped.shape

(52, 170)

In [21]:
Bronx_Pizza_Place = Bronx_grouped[["Neighborhood","Pizza Place"]]
Bronx_Pizza_Place.head()

Unnamed: 0,Neighborhood,Pizza Place
0,Allerton,0.142857
1,Baychester,0.05
2,Bedford Park,0.064516
3,Belmont,0.09
4,Bronxdale,0.083333


# Cluster Neighborhoods According to Pizzeria Frequency

In [22]:
kclusters = 4

Bronx_grouped_clustering = Bronx_Pizza_Place.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Bronx_grouped_clustering)

kmeans.labels_[0:4] 

array([3, 2, 2, 2], dtype=int32)

In [23]:
bronx_merged=Bronx_Pizza_Place.copy()

bronx_merged["Cluster Labels"] = kmeans.labels_

In [24]:
bronx_merged.head()

Unnamed: 0,Neighborhood,Pizza Place,Cluster Labels
0,Allerton,0.142857,3
1,Baychester,0.05,2
2,Bedford Park,0.064516,2
3,Belmont,0.09,2
4,Bronxdale,0.083333,2


In [25]:
bronx_merged=bronx_merged.join(bronx_data.set_index("Neighborhood"), on="Neighborhood")
bronx_merged.head()

Unnamed: 0,Neighborhood,Pizza Place,Cluster Labels,Borough,Latitude,Longitude
0,Allerton,0.142857,3,Bronx,40.865788,-73.859319
1,Baychester,0.05,2,Bronx,40.866858,-73.835798
2,Bedford Park,0.064516,2,Bronx,40.870185,-73.885512
3,Belmont,0.09,2,Bronx,40.857277,-73.888452
4,Bronxdale,0.083333,2,Bronx,40.852723,-73.861726


# Overlay Clusters on Map of Bronx

In [27]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bronx_merged['Latitude'], bronx_merged['Longitude'], bronx_merged['Neighborhood'], bronx_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Analyze Clusters

# Cluster 1

In [29]:
bronx_merged.loc[bronx_merged['Cluster Labels'] == 0, bronx_merged.columns[[0, 1] + list(range(5, bronx_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Pizza Place,Longitude
6,City Island,0.038462,-73.786488
8,Clason Point,0.0,-73.854144
10,Concourse,0.037037,-73.915589
11,Concourse Village,0.025,-73.915847
12,Country Club,0.0,-73.824099
15,Edenwald,0.0,-73.848083
17,Fieldston,0.0,-73.905643
27,Morrisania,0.035714,-73.901506
30,Mount Hope,0.0,-73.908299
33,Olinville,0.0,-73.863324


### Cluster 1 consists of neighborhoods containing either no pizzerias or a very low number of pizzerias.

# Cluster 2

In [30]:
bronx_merged.loc[bronx_merged['Cluster Labels'] == 1, bronx_merged.columns[[0, 1] + list(range(5, bronx_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Pizza Place,Longitude
26,Morris Park,0.227273,-73.850402
32,Norwood,0.2,-73.879391
46,Van Nest,0.294118,-73.866299


### Cluster 2 consists of neighborhoods containing a high number of pizzerias.

# Cluster 3

In [31]:
bronx_merged.loc[bronx_merged['Cluster Labels'] == 2, bronx_merged.columns[[0, 1] + list(range(5, bronx_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Pizza Place,Longitude
1,Baychester,0.05,-73.835798
2,Bedford Park,0.064516,-73.885512
3,Belmont,0.09,-73.888452
4,Bronxdale,0.083333,-73.861726
5,Castle Hill,0.1,-73.848027
7,Claremont Village,0.095238,-73.901199
9,Co-op City,0.0625,-73.829939
14,Eastchester,0.05,-73.827806
16,Edgewater Park,0.095238,-73.813885
18,Fordham,0.059524,-73.896427


### Cluster 3 consists of neighborhoods containing a low number of pizzerias.

# Cluster 4

In [32]:
bronx_merged.loc[bronx_merged['Cluster Labels'] == 3, bronx_merged.columns[[0, 1] + list(range(5, bronx_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Pizza Place,Longitude
0,Allerton,0.142857,-73.859319
13,East Tremont,0.176471,-73.887356
19,High Bridge,0.142857,-73.926102
22,Kingsbridge Heights,0.181818,-73.901523
24,Melrose,0.16,-73.909422
25,Morris Heights,0.125,-73.919672
31,North Riverdale,0.12,-73.904531
40,Schuylerville,0.125,-73.826203


### Cluster 4 consists of neighborhoods containing a moderate number of pizzerias.