### Importing all the required libraries.

In [1]:
import numpy as np 
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json 
from geopy.geocoders import Nominatim # converts an address into latitude and longitude values

import requests 
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans
import folium 

In [2]:
with open('NYC.json') as json_data:
    nyc_data = json.load(json_data)

In [3]:
ngh_data = nyc_data['features']

Defining the columns of the dataframe

In [4]:
cols = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
nghs = pd.DataFrame(columns=cols)
nghs

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


In [5]:
for data in ngh_data:
    bgh = ngh_name = data['properties']['borough'] 
    ngh_name = data['properties']['name']
        
    ngh_lat_long = data['geometry']['coordinates']
    ngh_lat = ngh_lat_long[1]
    ngh_long = ngh_lat_long[0]
    
    nghs = nghs.append({'Borough': bgh,
                        'Neighborhood': ngh_name,
                        'Latitude': ngh_lat,
                        'Longitude': ngh_long}, ignore_index=True)

In [6]:
nghs.head(11)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
5,Bronx,Kingsbridge,40.881687,-73.902818
6,Manhattan,Marble Hill,40.876551,-73.91066
7,Bronx,Woodlawn,40.898273,-73.867315
8,Bronx,Norwood,40.877224,-73.879391
9,Bronx,Williamsbridge,40.881039,-73.857446


Now, making use of the geopy library to obtain the latitude and longitude values of NYC

In [7]:
add = 'New York City, NY'
geoloc = Nominatim(user_agent="ny_explorer")
loc = geoloc.geocode(add)
latitude = loc.latitude
longitude = loc.longitude
print('The Latitude & longitude values of NYC are {}, {}.'.format(latitude, longitude))

The Latitude & longitude values of NYC are 40.7127281, -74.0060152.


Creating a map of NYC using latitude and longitude values

In [8]:
map_nyc = folium.Map(location=[latitude, longitude], zoom_start=10)

# adding markers to map of NYC
for lat, lng, bgh, ngh in zip(nghs['Latitude'], nghs['Longitude'], nghs['Borough'], nghs['Neighborhood']):
    label = '{}, {}'.format(ngh, bgh)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_nyc)  
    
map_nyc

Using the client id, client secret and version details from foursquare

In [9]:
client_id = 'FNOIZT3LLD1JMD4PYLSRH4H3AL5BRN2ECW1BC0S0YLUZAHMR' 
client_secret = 'K0GXEFGYMEJXKIVBCNRCHSU0ILTBPLISWVI2SQIFJUEBODEX'
version = '20180605' 

In [10]:
lim = 500 
bghs = ["Bronx","Manhattan","Brooklyn","Queens","Staten Island"]
result = {}
for bgh in bghs:
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&near={}&limit={}&categoryId={}'.format(
        client_id, 
        client_secret, 
        version, 
        bgh,
        lim,
        "4c2cd86ed066bed06c3c5209") # gluten-free restaurants category id from foursquare
    result[bgh] = requests.get(url).json()

In [11]:
venues_df={} 
for bgh in bghs:
    venues = json_normalize(result[bgh]['response']['groups'][0]['items'])
    venues_df[bgh] = venues[['venue.name', 'venue.location.address', 'venue.location.lat', 'venue.location.lng']]
    venues_df[bgh].columns = ['Name', 'Address', 'Lat', 'Lng']

venues_df

  This is separate from the ipykernel package so we can avoid doing imports until


{'Bronx':                                 Name                             Address  \
 0                Mighty Oak Roasters                       2801 24th Ave   
 1                    The Health Nuts                      21135 26th Ave   
 2                        Candle Cafe                        1307 3rd Ave   
 3                            Tap NYC                    267 Columbus Ave   
 4                      Kome Waza UES                        1275 1st Ave   
 5                              Noglu                    1266 Madison Ave   
 6                       Beyond Sushi                        62 W 56th St   
 7                          by CHLOE.                   1 Rockefeller Plz   
 8      Mario's Restaurant & Catering                     2342 Arthur Ave   
 9                    The Little Beet                       1140 Broadway   
 10                One Stop Beer Shop                   134 Kingsland Ave   
 11             The Little Beet Table                      333 Park

The following code is a list of maps named under the variable 'maps' which will allow us to visulaize the number of gluten-free restaurants in each of the 5 different boroughs in NYC

In [12]:
maps = {}
for bgh in bghs:
    bgh_lat = np.mean([result[bgh]['response']['geocode']['geometry']['bounds']['ne']['lat'],
                        result[bgh]['response']['geocode']['geometry']['bounds']['sw']['lat']])
    bgh_lng = np.mean([result[bgh]['response']['geocode']['geometry']['bounds']['ne']['lng'],
                        result[bgh]['response']['geocode']['geometry']['bounds']['sw']['lng']])
    maps[bgh] = folium.Map(loc = [bgh_lat, bgh_lng], zoom_start = 11)

    # adding markers to the maps
    for lat, lng, label in zip(venues_df[bgh]['Lat'], venues_df[bgh]['Lng'], venues_df[bgh]['Name']):
        label = folium.Popup(label, parse_html = True)
        folium.CircleMarker(
            [lat, lng],
            radius = 5,
            popup = label,
            color = 'purple',
            fill = True,
            fill_color = '#3186cc',
            fill_opacity = 0.7,
            parse_html = False).add_to(maps[bgh])  
    print(f"The number of gluten-free restaurants in {bgh} = ", result[bgh]['response']['totalResults'])
    

The number of gluten-free restaurants in Bronx =  47
The number of gluten-free restaurants in Manhattan =  164
The number of gluten-free restaurants in Brooklyn =  111
The number of gluten-free restaurants in Queens =  56
The number of gluten-free restaurants in Staten Island =  58


Neighborhood | Number of gluten-free restaurants 
--- | --- 
Manhattan | 164
Brooklyn | 111
Staten Island | 58
Queens | 56
Bronx | 47

##### From the results shown above, we can clearly see that Manhattan has highest number of Gluten-free restraunts, followed by Brooklyn and then Staten Island. Note that, the borough having the maximum number of restraunts which is Manhattan, doesn't imply that these are accessible to the tourist. Therefore, we'll have to check for the density in specific boroughs.

### Plotting all the borough maps 

Note than bghs[0] - Bronx, bghs[1] - Manhattan, bghs[2] - Brooklyn, bghs[3] - Queens, bghs[4] - Staten Island

In [26]:
maps[bghs[0]]

In [27]:
maps[bghs[1]]

In [28]:
maps[bghs[2]]

In [29]:
maps[bghs[3]]

In [30]:
maps[bghs[4]]

The following code will show us how close the restraunts are next to each other. For this we will analyse the mean of the distances from the mean of the coordinates.
To do this, we'll have to first calculate the average of the coordinates i.e. average distance of all coordinates, and then following this, the average of the distances of the restraunts from the mean of the coordinates is calculated.  


In [31]:
maps_analysis = {}
for bgh in bghs:
    bgh_lat = np.mean([result[bgh]['response']['geocode']['geometry']['bounds']['ne']['lat'],
                        result[bgh]['response']['geocode']['geometry']['bounds']['sw']['lat']])
    bgh_lng = np.mean([result[bgh]['response']['geocode']['geometry']['bounds']['ne']['lng'],
                        result[bgh]['response']['geocode']['geometry']['bounds']['sw']['lng']])
    maps_analysis[bgh] = folium.Map(loc = [bgh_lat, bgh_lng], zoom_start = 10)
    venues_mean_coordinates = [venues_df[bgh]['Lat'].mean(), venues_df[bgh]['Lng'].mean()] 
    
    # adding markers to the maps
    for lat, lng, label in zip(venues_df[bgh]['Lat'], venues_df[bgh]['Lng'], venues_df[bgh]['Name']):
        label = folium.Popup(label, parse_html = True)
        folium.CircleMarker(
            [lat, lng],
            radius = 5,
            popup = label,
            color='black',
            fill = True,
            fill_color = '#3186cc',
            fill_opacity = 0.7,
            legend_name = 'borough:'+ bgh,
            parse_html = False).add_to(maps_analysis[bgh])
        folium.PolyLine([venues_mean_coordinates, [lat, lng]], color = "red", weight = 1.5, opacity = 0.5).add_to(maps_analysis[bgh])
    
    label = folium.Popup("Mean Co-ordinate", parse_html = True)
    folium.CircleMarker(
        venues_mean_coordinates,
        radius = 10,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7,
        caption = ''+bgh,
        parse_html = False).add_to(maps_analysis[bgh])

    a = np.mean(np.apply_along_axis(lambda x: np.linalg.norm(x - venues_mean_coordinates),1,venues_df[bgh][['Lat','Lng']].values))
    print('For the borough',bgh,", the average distance from the average coordinates is : ",a)
    

For the borough Bronx , the average distance from the average coordinates is :  0.06746237034747092
For the borough Manhattan , the average distance from the average coordinates is :  0.022651830023089826
For the borough Brooklyn , the average distance from the average coordinates is :  0.03232477411760373
For the borough Queens , the average distance from the average coordinates is :  0.06808135560349733
For the borough Staten Island , the average distance from the average coordinates is :  0.11618800772126377


Neighborhood | Average Distance
--- | --- 
Manhattan | 0.022651830023089826
Brooklyn | 0.03232477411760373
Bronx | 0.06746237034747092
Queens | 0.06808135560349733
Staten Island | 0.11618800772126377



### From the above results, it is clear that the restaurants are most densely populated in Manhattan, followed by Brooklyn and finally Bronx. Also notice that the average distance of the boroughs Bronx and Brooklyn does'nt have much of a significant difference in their values. However, the results obtained above conclude that the borough Manhattan would be a perfect choice for the Gluten-allergic tourists visiting NYC since this borough is not only densely populated but the average distance from the average coordinates is also really small compared to the other borough average distances.

##### Now we'll be plotting the maps of each of the various boroughs in order to visualize the average distances. This is stored in the variable 'maps_analysis'

In [32]:
maps_analysis[bghs[0]]

In [33]:
maps_analysis[bghs[1]]

In [34]:
maps_analysis[bghs[2]]

In [35]:
maps_analysis[bghs[3]]

In [36]:
maps_analysis[bghs[4]]

### We see that in boroughs Bronx, Queens and Staten Island, there are 3, 7 and 3 outliers respectively. This might affect our results, hence its safe to remove them before concluding anything. 

In [39]:
bgh = 'Bronx'
venues_mean_coordinates = [venues_df[bgh]['Lat'].mean(), venues_df[bgh]['Lng'].mean()] 

print(bgh)
print("Mean Distance from Mean coordinates")
dist = np.apply_along_axis(lambda x: np.linalg.norm(x - venues_mean_coordinates),1,venues_df[bgh][['Lat','Lng']].values)
dist.sort()
print(np.mean(dist[:-3]))# Ignoromg the 3 biggest distance

Bronx
Mean Distance from Mean coordinates
0.053565619863168656


In [40]:
bgh = 'Queens'
venues_mean_coordinates = [venues_df[bgh]['Lat'].mean(), venues_df[bgh]['Lng'].mean()] 

print(bgh)
print("Mean Distance from Mean coordinates")
dist = np.apply_along_axis(lambda x: np.linalg.norm(x - venues_mean_coordinates),1,venues_df[bgh][['Lat','Lng']].values)
dist.sort()
print(np.mean(dist[:-7]))# Ignoromg the 7 biggest distance

Queens
Mean Distance from Mean coordinates
0.04293349210337712


In [98]:
bgh = 'Staten Island'
venues_mean_coordinates = [venues_df[bgh]['Lat'].mean(), venues_df[bgh]['Lng'].mean()] 

print(bgh)
print("Mean Distance from Mean coordinates")
dist = np.apply_along_axis(lambda x: np.linalg.norm(x - venues_mean_coordinates),1,venues_df[bgh][['Lat','Lng']].values)
dist.sort()
print(np.mean(dist[:-3]))# Ignoromg the 5 biggest distances

Staten Island
Mean Distance from Mean coordinates
0.10015259081593658


The final table of our average distances would look like :

Neighborhood | Average Distance
--- | --- 
Manhattan | 0.022651830023089826
Brooklyn | 0.03232477411760373
Queens | 0.04293349210337712
Bronx | 0.053565619863168656
Staten Island | 0.10015259081593658

Rank before | avg dist before | Rank after | avg dist after
--- | --- | ---| ---
Manhattan | 0.02265 | Manhattan | 0.02265 
Brooklyn | 0.032324 | Brooklyn | 0.032324
Bronx | 0.06746 | Queens | 0.042933
Queens | 0.06808 | Bronx | 0.053565
Staten Island | 0.100152 | Staten Island | 0.100152

### From the above results, even after the removal of outliers, we are sure of our results and we can hence conclude that Manhattan is the perfect choice for a gluten-allergic person.