In [4]:
# First Part, The Report:

# 1 - Introduction: The problem I chose is that if a person wanted to open a restaurant in New York City, which place would be most ideal for him and
# his business.
# Obviously this problem targets business owners in NYC who would like to know what factors determine which place would be suitable for their business
# to thrive.

# 2 - Data: The data I will be using is the Foursquare location data.
# I will be using this data to view and determine which area in NYC is best for the restaurant, by looking at different factors which include:
# The shops and restaurants that are open near the area, the location of the area and whether its visited by many people or not,
# the rating of nearby shops and their popularity, whether the restaurant would be a unique addition to the area or not.

# 3 - Methodology: I first imported the data of NYC and loaded it, then I chose to focus on manhattan as it's the most populated area of NYC,
# and for the restaurant to thrive, we would need a lot of people to come to the area.

# 4 - Results: After that, I checked the neighborhood and nearby venues and chose the place I found most suitable for the restaurant,
# the place had a diverse set of venues, only 1 pizza place, and other interesting venues someone would like to visit before or after
# eating in the restaurant.

# 5 - Discussion: I would recommend that u study the areas that relate to your problem well, and make sure that the variables which affect your 
# problem are there.

# 6 - Conclusion: The best areas for a restaurant to open is the areas which don't have many competing restaurants, but have a diverse set of venues,
# as well as being in a popular area which is visited often by a large amount of people.

In [9]:
# Second Part, The Code:
# First, we import the libraries
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!pip install geopy 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe



In [13]:
# we then import and load new york data
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data) 
neighborhoods_data = newyork_data['features']            # we define a new variable that contains the relevant data

column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
neighborhoods = pd.DataFrame(columns=column_names)                     # we then transform the data into a dataframe

for data in neighborhoods_data:                                       # we then loop through the data and fill the dataframe
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True) # we create a new dataframe of manhattan
address = 'Manhattan, NY'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude                   # we get the latitude, longitude values of manhattan

CLIENT_ID = 'W4RVZZONWGFSOJWOAFXWUYT3MHG2CDDQ5WJO2M23QREYJYHG'
CLIENT_SECRET = '1RGGK2SSIS5ZB4OVFMNDF5H50LSG2BRE2L5ERP3BY4H25BXS'
VERSION = '20180605'                                                # we define our foursquare credentials and version

neighborhood_latitude = manhattan_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = manhattan_data.loc[0, 'Longitude'] # neighborhood longitude value
neighborhood_name = manhattan_data.loc[0, 'Neighborhood']   # neighborhood name

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
results = requests.get(url).json()

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
venues = results['response']['groups'][0]['items']   
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues          # as we can see from this place in manhattan, it has only 1 pizza place, 23 other good variety of venues which people visit a lot
                       # which would indicate that this a great place for the restaurant.

Unnamed: 0,name,categories,lat,lng
0,Arturo's,Pizza Place,40.874412,-73.910271
1,Bikram Yoga,Yoga Studio,40.876844,-73.906204
2,Tibbett Diner,Diner,40.880404,-73.908937
3,Starbucks,Coffee Shop,40.877531,-73.905582
4,Astral Fitness & Wellness Center,Gym,40.876705,-73.906372
5,Dunkin',Donut Shop,40.877136,-73.906666
6,Rite Aid,Pharmacy,40.875467,-73.908906
7,TCR The Club of Riverdale,Tennis Stadium,40.878628,-73.914568
8,Starbucks,Coffee Shop,40.873755,-73.908613
9,Blink Fitness,Gym,40.877271,-73.905595
