# Find competitors by Borough

## 1. Introduction

There are 5 Boroughs in New York.
Let' find competitors profile for each one of them.

### Bronx
### Brooklyn
### Manhattan
### Queens
### Staten Island


## 2. Libraries

In [6]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')


Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


## 3 Now let's import and plot the Neighborhoods of New York

Import the Data file

In [7]:
!wget -q -O 'newyork_data.json' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs/newyork_data.json
print('Data downloaded!')

Data downloaded!


Let's have a look : this is a standard JSON data file and we are looking for Neighborhoods coordinates

In [8]:
# read JSON file
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

# import into a Dataframe for future manipulation
neighborhoods_data = newyork_data['features']
neighborhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

In [9]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)


In [10]:
# loop into the JSON structure to retrieve all the details
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [11]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


In [12]:
neighborhoods.groupby('Borough').count()

Unnamed: 0_level_0,Neighborhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bronx,52,52,52
Brooklyn,70,70,70
Manhattan,40,40,40
Queens,81,81,81
Staten Island,63,63,63


In [122]:
borough_name ='Staten Island'

### 4 Looking after Drive-in and Food-truck implementation using Foursquare API

In [123]:
CLIENT_ID = 'NDWI4WXLNXAPZ041WJT1BAORH5LXPQHLEUFOLEEFOJ3M2H0H' # your Foursquare ID
CLIENT_SECRET = 'ETKGJLHOWVB2GVDAVOMVHL3TOCPVX0TD0U5P42PA5U3PLHUW' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: NDWI4WXLNXAPZ041WJT1BAORH5LXPQHLEUFOLEEFOJ3M2H0H
CLIENT_SECRET:ETKGJLHOWVB2GVDAVOMVHL3TOCPVX0TD0U5P42PA5U3PLHUW


In [124]:
# Define a function to retrieve Nearby venues based on some key-words
def getNearbyVenues(names, latitudes, longitudes, search_query, radius=500):
    
    venues_list=[]
    for name, lat, lng, query in zip(names, latitudes, longitudes, search_query):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&query={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            search_query,
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [125]:
neighborhoods.groupby('Borough').count()

Unnamed: 0_level_0,Neighborhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bronx,52,52,52
Brooklyn,70,70,70
Manhattan,40,40,40
Queens,81,81,81
Staten Island,63,63,63


In [126]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == borough_name].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Staten Island,St. George,40.644982,-74.079353
1,Staten Island,New Brighton,40.640615,-74.087017
2,Staten Island,Stapleton,40.626928,-74.077902
3,Staten Island,Rosebank,40.615305,-74.069805
4,Staten Island,West Brighton,40.631879,-74.107182


In [127]:
address = borough_name + ', NY'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} are {}, {}.'.format(borough_name, latitude, longitude))

The geograpical coordinate of Staten Island are 40.5834557, -74.1496048.


In [128]:
# create map of Manhattan using latitude and longitude values
map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan

In [129]:
file_name = borough_name +'_neighborhoods.png'
map_manhattan.save(file_name)

In [130]:
# Get Fast Food venues in Manhattan 
manhattan_ff_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude'],
                                   search_query='Fast Food')

St. George
New Brighton
Stapleton
Rosebank
West Brighton
Grymes Hill
Todt Hill
South Beach
Port Richmond


In [131]:
# # Save the information so far to a .csv file due to limited calls on FourSquare
ff_file_name = borough_name +'_fastfood_tocsv.csv'
manhattan_ff_venues.to_csv(ff_file_name)
manhattan_ff_venues.shape

(9, 7)

In [132]:
# Get Office venues in Manhattan
manhattan_off_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude'],
                                   search_query='Office')

St. George
New Brighton
Stapleton
Rosebank
West Brighton
Grymes Hill


In [133]:
off_file_name = borough_name +'_office_tocsv.csv'
manhattan_off_venues.to_csv(off_file_name)
manhattan_off_venues.shape

(4, 7)

In [136]:
# Get building venues in Manhattan
manhattan_bdg_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude'],
                                   search_query='Building')

St. George
New Brighton
Stapleton
Rosebank
West Brighton
Grymes Hill
Todt Hill
South Beach


ValueError: Length mismatch: Expected axis has 0 elements, new values have 7 elements

In [138]:
bdg_file_name = borough_name +'_building_tocsv.csv'
manhattan_bdg_venues.to_csv(bdg_file_name)
manhattan_bdg_venues.shape

(2, 7)

In [137]:
# Merge FastFood venues and Competitors venues
manhattan_venues = manhattan_ff_venues.append(manhattan_bdg_venues).append(manhattan_off_venues)
print(manhattan_venues.shape)
manhattan_venues.head()

(15, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,St. George,40.644982,-74.079353,The Burrito Shoppe,40.643639,-74.077919,Fast Food Restaurant
1,St. George,40.644982,-74.079353,Popeyes Louisiana Kitchen,40.6441,-74.07695,Fast Food Restaurant
2,St. George,40.644982,-74.079353,Subway,40.646618,-74.08107,Fast Food Restaurant
3,Stapleton,40.626928,-74.077902,Subway,40.627248,-74.075881,Fast Food Restaurant
4,Stapleton,40.626928,-74.077902,McDonald's,40.629947,-74.076408,Fast Food Restaurant


In [139]:
manhattan_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Kingsbridge,1,1,1,1,1,1
Port Richmond,1,1,1,1,1,1
Rosebank,1,1,1,1,1,1
St. George,5,5,5,5,5,5
Stapleton,3,3,3,3,3,3
West Brighton,3,3,3,3,3,3
Woodlawn,1,1,1,1,1,1


In [140]:
print('There are {} uniques categories.'.format(len(manhattan_venues['Venue Category'].unique())))

There are 5 uniques categories.


In [141]:
# let's assume Competitors are TakeAway categorized as  : Sandwich Place, Salad Place, Fast Food Restaurant, Food Truck
fastfood_dict = {'Fast Food Restaurant' : 'Fast Food',
                 'Salad Place' : 'Salad',
                 'Sandwich Place' : 'Sandwich',
                 'Food Truck' : 'Truck'}
# Add a new column named 'FoodType' 
manhattan_venues['FoodType'] = manhattan_venues['Venue Category'].map(fastfood_dict) 
manhattan_venues['FoodType'] = manhattan_venues['FoodType'].fillna('Restaurant')


# let's assume Consumer are from Office building  : Office, Building
customer_dict = {'Office' : 'Worker',
                 'Building' : 'Worker'}
# Add a new column named 'FoodType' 
manhattan_venues['Customer'] = manhattan_venues['Venue Category'].map(customer_dict) 
manhattan_venues['Customer'] = manhattan_venues['Customer'].fillna('NotCustomer')

# let's focus on Competitor
#manhattan_competitors = manhattan_venues[manhattan_venues.FoodType.isin(['Fast','Truck'])]

manhattan_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,FoodType,Customer
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Kingsbridge,1,1,1,1,1,1,1,1
Port Richmond,1,1,1,1,1,1,1,1
Rosebank,1,1,1,1,1,1,1,1
St. George,5,5,5,5,5,5,5,5
Stapleton,3,3,3,3,3,3,3,3
West Brighton,3,3,3,3,3,3,3,3
Woodlawn,1,1,1,1,1,1,1,1


In [142]:
manhattan_venues.shape

(15, 9)

In [143]:
competitor_file_name = 'competitor_'+borough_name +'_tocsv.csv'
manhattan_venues.to_csv(competitor_file_name)