In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis

import json # library to handle JSON files

!pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install folium
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [2]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [4]:
neighborhoods_data = newyork_data['features']

In [5]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [6]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [7]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [8]:
address = 'New York, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [9]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [10]:
brooklyn_data = neighborhoods[neighborhoods['Borough'] == 'Brooklyn'].reset_index(drop=True)
brooklyn_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Brooklyn,Bay Ridge,40.625801,-74.030621
1,Brooklyn,Bensonhurst,40.611009,-73.99518
2,Brooklyn,Sunset Park,40.645103,-74.010316
3,Brooklyn,Greenpoint,40.730201,-73.954241
4,Brooklyn,Gravesend,40.59526,-73.973471


In [11]:
brooklyn_data.drop(['Borough'], axis = 1, inplace = True)
brooklyn_data.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Bay Ridge,40.625801,-74.030621
1,Bensonhurst,40.611009,-73.99518
2,Sunset Park,40.645103,-74.010316
3,Greenpoint,40.730201,-73.954241
4,Gravesend,40.59526,-73.973471


In [12]:
brooklyn_data.loc[brooklyn_data['Neighborhood']== 'Williamsburg']

Unnamed: 0,Neighborhood,Latitude,Longitude
15,Williamsburg,40.707144,-73.958115


In [13]:
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
map_brooklyn = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(brooklyn_data['Latitude'], brooklyn_data['Longitude'], brooklyn_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_brooklyn)  
    
map_brooklyn

In [14]:
CLIENT_ID = 'IKVJZO3L2RKHD12H35LD3S40Z51JWXA4NN4XL3PH00GK2QRU' # your Foursquare ID
CLIENT_SECRET = 'FAMQK0S5IWNNSM00JNCZXYXLK4XUBIMSPY3JSWCFG0XE0XVP' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [15]:
#function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [16]:
#function that gets the nearby venues
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [17]:
LIMIT = 100
radius =500
brooklyn_venues = getNearbyVenues(names = brooklyn_data['Neighborhood'],
                                   latitudes = brooklyn_data['Latitude'],
                                   longitudes = brooklyn_data['Longitude']
                                  )

Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker Heights
Gerritsen Beach
Marine Park
Clinton Hill
Sea Gate
Downtown
Boerum Hill
Prospect Lefferts Gardens
Ocean Hill
City Line
Bergen Beach
Midwood
Prospect Park South
Georgetown
East Williamsburg
North Side
South Side
Ocean Parkway
Fort Hamilton
Ditmas Park
Wingate
Rugby
Remsen Village
New Lots
Paerdegat Basin
Mill Basin
Fulton Ferry
Vinegar Hill
Weeksville
Broadway Junction
Dumbo
Homecrest
Highland Park
Madison
Erasmus


In [18]:
brooklyn_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bath Beach,50,50,50,50,50,50
Bay Ridge,84,84,84,84,84,84
Bedford Stuyvesant,25,25,25,25,25,25
Bensonhurst,34,34,34,34,34,34
Bergen Beach,6,6,6,6,6,6
...,...,...,...,...,...,...
Vinegar Hill,31,31,31,31,31,31
Weeksville,20,20,20,20,20,20
Williamsburg,35,35,35,35,35,35
Windsor Terrace,25,25,25,25,25,25


In [19]:
# one hot encoding
brooklyn_onehot = pd.get_dummies(brooklyn_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
brooklyn_onehot['Neighborhood'] = brooklyn_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [brooklyn_onehot.columns[-1]] + list(brooklyn_onehot.columns[:-1])
brooklyn_onehot = brooklyn_onehot[fixed_columns]

brooklyn_grouped = brooklyn_onehot.groupby('Neighborhood').mean().reset_index()

In [20]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [21]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Pc_venues_sorted = pd.DataFrame(columns=columns)
Pc_venues_sorted['Neighborhood'] = brooklyn_grouped['Neighborhood']

for ind in np.arange(brooklyn_grouped.shape[0]):
    Pc_venues_sorted.iloc[ind, 1:] = return_most_common_venues(brooklyn_grouped.iloc[ind, :], num_top_venues)

In [22]:
Pc_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bath Beach,Pharmacy,Chinese Restaurant,Pizza Place,Gas Station,Bubble Tea Shop,Italian Restaurant,Fast Food Restaurant,Sushi Restaurant,Deli / Bodega,Dessert Shop
1,Bay Ridge,Italian Restaurant,Pizza Place,Spa,American Restaurant,Greek Restaurant,Bar,Bagel Shop,Thai Restaurant,Ice Cream Shop,Playground
2,Bedford Stuyvesant,Coffee Shop,Café,Pizza Place,Bar,Bagel Shop,Fried Chicken Joint,New American Restaurant,Boutique,Gift Shop,Gourmet Shop
3,Bensonhurst,Grocery Store,Chinese Restaurant,Flower Shop,Ice Cream Shop,Pizza Place,Sushi Restaurant,Donut Shop,Italian Restaurant,Noodle House,Liquor Store
4,Bergen Beach,Harbor / Marina,Athletics & Sports,Baseball Field,Playground,Donut Shop,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop


In [23]:
kclusters = 5
brooklyn_grouped_clustering = brooklyn_grouped.drop('Neighborhood', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(brooklyn_grouped_clustering)
#add cluster labels
Pc_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
brooklyn_merged = brooklyn_data
brooklyn_merged = brooklyn_merged.join(Pc_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
brooklyn_merged.head() 

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bay Ridge,40.625801,-74.030621,0,Italian Restaurant,Pizza Place,Spa,American Restaurant,Greek Restaurant,Bar,Bagel Shop,Thai Restaurant,Ice Cream Shop,Playground
1,Bensonhurst,40.611009,-73.99518,0,Grocery Store,Chinese Restaurant,Flower Shop,Ice Cream Shop,Pizza Place,Sushi Restaurant,Donut Shop,Italian Restaurant,Noodle House,Liquor Store
2,Sunset Park,40.645103,-74.010316,0,Pizza Place,Bank,Bakery,Latin American Restaurant,Mexican Restaurant,Mobile Phone Shop,Gym,Fried Chicken Joint,Pharmacy,Café
3,Greenpoint,40.730201,-73.954241,0,Bar,Pizza Place,Coffee Shop,Cocktail Bar,Yoga Studio,Deli / Bodega,French Restaurant,Sushi Restaurant,Restaurant,Furniture / Home Store
4,Gravesend,40.59526,-73.973471,0,Italian Restaurant,Pizza Place,Bus Station,Lounge,Bakery,Chinese Restaurant,Martial Arts Dojo,Men's Store,Metro Station,Furniture / Home Store


In [24]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, nei, cluster in zip(brooklyn_merged['Latitude'], brooklyn_merged['Longitude'], brooklyn_merged['Neighborhood'], brooklyn_merged['Cluster Labels']):
    label = folium.Popup(str(nei) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [25]:
brooklyn_merged.loc[brooklyn_merged['Neighborhood'] == 'Williamsburg']

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Williamsburg,40.707144,-73.958115,0,Coffee Shop,Bar,Bagel Shop,Yoga Studio,Greek Restaurant,Korean Restaurant,Tapas Restaurant,Taco Place,Event Space,Liquor Store


In [26]:
cluster3 = brooklyn_merged[brooklyn_merged['Cluster Labels'] == 0 ].reset_index(drop = True)
cluster3.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bay Ridge,40.625801,-74.030621,0,Italian Restaurant,Pizza Place,Spa,American Restaurant,Greek Restaurant,Bar,Bagel Shop,Thai Restaurant,Ice Cream Shop,Playground
1,Bensonhurst,40.611009,-73.99518,0,Grocery Store,Chinese Restaurant,Flower Shop,Ice Cream Shop,Pizza Place,Sushi Restaurant,Donut Shop,Italian Restaurant,Noodle House,Liquor Store
2,Sunset Park,40.645103,-74.010316,0,Pizza Place,Bank,Bakery,Latin American Restaurant,Mexican Restaurant,Mobile Phone Shop,Gym,Fried Chicken Joint,Pharmacy,Café
3,Greenpoint,40.730201,-73.954241,0,Bar,Pizza Place,Coffee Shop,Cocktail Bar,Yoga Studio,Deli / Bodega,French Restaurant,Sushi Restaurant,Restaurant,Furniture / Home Store
4,Gravesend,40.59526,-73.973471,0,Italian Restaurant,Pizza Place,Bus Station,Lounge,Bakery,Chinese Restaurant,Martial Arts Dojo,Men's Store,Metro Station,Furniture / Home Store


In [27]:
cl3_coffee = cluster3.loc[cluster3['1st Most Common Venue'] == 'Coffee Shop'].reset_index(drop = True)
cl3_coffee

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Williamsburg,40.707144,-73.958115,0,Coffee Shop,Bar,Bagel Shop,Yoga Studio,Greek Restaurant,Korean Restaurant,Tapas Restaurant,Taco Place,Event Space,Liquor Store
1,Bedford Stuyvesant,40.687232,-73.941785,0,Coffee Shop,Café,Pizza Place,Bar,Bagel Shop,Fried Chicken Joint,New American Restaurant,Boutique,Gift Shop,Gourmet Shop
2,Park Slope,40.672321,-73.97705,0,Coffee Shop,Burger Joint,Bagel Shop,Pet Store,Korean Restaurant,Bookstore,Italian Restaurant,Bakery,Pizza Place,American Restaurant
3,North Side,40.714823,-73.958809,0,Coffee Shop,Pizza Place,Yoga Studio,Wine Bar,Bar,Bakery,American Restaurant,Vegetarian / Vegan Restaurant,Jewelry Store,Cocktail Bar
4,Dumbo,40.703176,-73.988753,0,Coffee Shop,Park,Scenic Lookout,Bakery,Café,Boxing Gym,Italian Restaurant,Gym,Pizza Place,Bar


In [28]:
map_coffee = folium.Map(location=[latitude, longitude], zoom_start=12)
for lat, lng, label in zip(cl3_coffee['Latitude'], cl3_coffee['Longitude'], cl3_coffee['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_coffee)  
    
map_coffee