# Business plan
## By Michael Moxey

Using the Foursquare API I will find out what borough you are most likely in depending on which store you are in. I will create a decision tree that will go through the boroughs comparing the likelyhood of stores.

## Data 

The data I will use is the 2014 New york city Neighborhood Names from NYU Spatial Data Repository and the foursquare API.

In [1]:
import numpy as np 
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

In [2]:
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium
import json

In [3]:
with open('C:\\Users\\micha\\Desktop\\Jupyter notebook\\rows.json') as json_data:
    newyork_data = json.load(json_data)

In [4]:
newyork_data

{'meta': {'view': {'id': 'xyye-rtrs',
   'name': 'NHoodNameCentroids',
   'assetType': 'invalid_datatype',
   'averageRating': 0,
   'createdAt': 1365528012,
   'displayType': 'geoRows',
   'downloadCount': 13461,
   'hideFromCatalog': True,
   'hideFromDataJson': True,
   'indexUpdatedAt': 1537209301,
   'newBackend': True,
   'numberOfComments': 0,
   'oid': 15699813,
   'provenance': 'community',
   'publicationAppendEnabled': False,
   'publicationGroup': 6258719,
   'publicationStage': 'published',
   'tableId': 6258719,
   'totalTimesRated': 0,
   'viewCount': 56,
   'viewLastModified': 1536598173,
   'viewType': 'tabular',
   'approvals': [{'reviewableUid': '99bc-9p23',
     'reviewedAt': 1365528013,
     'reviewedAutomatically': True,
     'state': 'approved',
     'submissionId': 1065643,
     'submissionObject': 'public_audience_request',
     'submissionOutcome': 'change_audience',
     'submittedAt': 1365528013,
     'workflowId': 2285,
     'submissionDetails': {'permissio

In [5]:
neighborhoods_data = newyork_data['data']

In [6]:
neighborhoods_data[1]

['row-s456-uyfi_n9s7',
 '00000000-0000-0000-20BE-C45ADF984BA3',
 0,
 1450726363,
 None,
 1450726363,
 None,
 '{ }',
 'POINT (-73.82993910812405 40.87429419303015)',
 '2',
 'Co-op City',
 '2',
 'Co-op',
 'City',
 '',
 '0.0',
 'Bronx']

In [7]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [8]:
neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


In [9]:
for data in neighborhoods_data:
    borough = neighborhood_name = data[16] 
    neighborhood_name = data[10]
        
    val = data[8].split('(', 1)[1].split(')')[0]
    neighborhood_latlon = val.split()
    neighborhood_lat = neighborhood_latlon[0]
    neighborhood_lon = neighborhood_latlon[1]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [10]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,-73.8472005205491,40.89470517661004
1,Bronx,Co-op City,-73.82993910812405,40.87429419303015
2,Bronx,Eastchester,-73.82780644716419,40.88755567735082
3,Bronx,Fieldston,-73.90564259591689,40.89543742690388
4,Bronx,Riverdale,-73.91258546108577,40.89083449389134


In [11]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 299 neighborhoods.


In [13]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [14]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [15]:
queens_data = neighborhoods[neighborhoods['Borough'] == 'Queens'].reset_index(drop=True)
queens_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Queens,Astoria,-73.91565374304241,40.768508593354966
1,Queens,Woodside,-73.90184166838291,40.746349088602265
2,Queens,Jackson Heights,-73.88282109164372,40.75198138007372
3,Queens,Elmhurst,-73.88165622288396,40.74404850512207
4,Queens,Howard Beach,-73.83813764600289,40.6542252773849


In [16]:
address = 'Queens, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Queens are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Queens are 40.7498243, -73.7976337.


In [19]:
# create map of Manhattan using latitude and longitude values
map_queens = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(queens_data['Latitude'], queens_data['Longitude'], queens_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_queens)  
    
map_queens

## Using Foursquare API

In [12]:
CLIENT_ID = 'QE4UWQWDE1HS4Z15MUCAG3V2NW4BMV3KXNMJK5AQVY0CIBIL' # your Foursquare ID
CLIENT_SECRET = 'K5GA3VT1GJVEP3HPKMARARBYQGDZFYLMFADVX2NBLRZQ1SU3' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: QE4UWQWDE1HS4Z15MUCAG3V2NW4BMV3KXNMJK5AQVY0CIBIL
CLIENT_SECRET:K5GA3VT1GJVEP3HPKMARARBYQGDZFYLMFADVX2NBLRZQ1SU3


In [25]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=QE4UWQWDE1HS4Z15MUCAG3V2NW4BMV3KXNMJK5AQVY0CIBIL&client_secret=K5GA3VT1GJVEP3HPKMARARBYQGDZFYLMFADVX2NBLRZQ1SU3&v=20180605&ll=40.7498243,-73.7976337&radius=500&limit=100'

In [33]:
results = requests.get(url).json()
#results

In [27]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [28]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Pado Sushi Restaurant,Korean Restaurant,40.753599,-73.794425
1,Western Riding Club,Sports Club,40.749359,-73.797699
2,Underhill Ave Bike Trail,Bike Trail,40.747286,-73.799391
3,MTA - Q65 Bus Stop,Bus Station,40.748537,-73.801613


In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
queens_data.shape

(78, 4)

In [35]:
queens_venues = getNearbyVenues(names= queens_data['Neighborhood'],
                                   latitudes= queens_data['Latitude'],
                                   longitudes= queens_data['Longitude'],
                                  )

Astoria
Woodside
Jackson Heights
Elmhurst
Howard Beach
South Corona
Forest Hills
Kew Gardens
Richmond Hill
Downtown Flushing
Long Island City
Sunnyside
East Elmhurst
Maspeth
Ridgewood
Glendale
Rego Park
Woodhaven
Ozone Park
South Ozone Park
College Point
Whitestone
Bayside
Auburndale
Little Neck
Douglaston
Glen Oaks
Bellerose
Kew Gardens Hills
Fresh Meadows
Briarwood
Jamaica Center
Oakland Gardens
Queens Village
Hollis
South Jamaica
St. Albans
Rochdale
Springfield Gardens
Cambria Heights
Rosedale
Far Rockaway
Broad Channel
Breezy Point
Steinway
Beechhurst
Bay Terrace
Edgemere
Arverne
Seaside
Neponsit
Murray Hill
Floral Park
Holliswood
Jamaica Estates
Queensboro Hill
Hillcrest
Ravenswood
Lindenwood
Laurelton
Lefrak City
Belle Harbor
Rockaway Park
Somerville
Brookville
Bellaire
North Corona
Forest Hills Gardens
Jamaica Hills
Utopia
Pomonok
Astoria Heights
Hunters Point
Sunnyside Gardens
Blissville
Roxbury
Middle Village
Malba


ValueError: Length mismatch: Expected axis has 0 elements, new values have 7 elements

In [36]:
print(queens_venues.shape)
queens_venues.head()

NameError: name 'queens_venues' is not defined

In [None]:
manhattan_venues.groupby('Neighborhood').count()