## The Battle Of Neighborhoods

#### Import the required libraries.

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import requests # library to handle requests

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import folium # map rendering library

print('Libraries imported.')

Libraries imported.


#### Foursquare Credentials and Version

In [2]:
CLIENT_ID = 'VN0JXZXWPDXVOZBKO2KMB5TOQWL3NHRPM11R2CCOHHB5EBUR' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)

Your credentails:
CLIENT_ID: VN0JXZXWPDXVOZBKO2KMB5TOQWL3NHRPM11R2CCOHHB5EBUR


#### Getting the Data

In [3]:
LIMIT = 500
cities = ["New York, NY", 'Chicago, IL', 'Boston, MA', 'Seattle, WA']
results = {}
for city in cities:
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&near={}&limit={}&categoryId={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        city,
        LIMIT,
        "4bf58dd8d48988d148941735") # DONUT SHOP CATEGORY ID
    results[city] = requests.get(url).json()

In [4]:
df_venues={}
for city in cities:
    venues = json_normalize(results[city]['response']['groups'][0]['items'])
    df_venues[city] = venues[['venue.name', 'venue.location.address', 'venue.location.lat', 'venue.location.lng']]
    df_venues[city].columns = ['Name', 'Address', 'Latitude', 'Longitude']

  This is separate from the ipykernel package so we can avoid doing imports until


The Foursquare API gives us the nearest 100 venues in the city.

#### Create Map for Each City

In [5]:
map1 = {}
for city in cities:
    city_lat = np.mean([results[city]['response']['geocode']['geometry']['bounds']['ne']['lat'],
                        results[city]['response']['geocode']['geometry']['bounds']['sw']['lat']])
    city_lng = np.mean([results[city]['response']['geocode']['geometry']['bounds']['ne']['lng'],
                        results[city]['response']['geocode']['geometry']['bounds']['sw']['lng']])
    map1[city] = folium.Map(location=[city_lat, city_lng], zoom_start=11)

    # add markers to map
    for lat, lng, label in zip(df_venues[city]['Latitude'], df_venues[city]['Longitude'], df_venues[city]['Name']):
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map1[city])  
    print(f"Total number of donut shops  in {city} = ", results[city]['response']['totalResults'])
    print("Showing Top 100")

Total number of donut shops  in New York, NY =  171
Showing Top 100
Total number of donut shops  in Chicago, IL =  104
Showing Top 100
Total number of donut shops  in Boston, MA =  80
Showing Top 100
Total number of donut shops  in Seattle, WA =  69
Showing Top 100


In [6]:
map1[cities[0]]

In [7]:
map1[cities[1]]

In [8]:
map1[cities[2]]

In [9]:
map1[cities[3]]

We can see that New York is the most dense city with donut shops.

For an accurate measure of this density, get the mean location of the donut shops which should be near to most of them if they are really dense or far if not.

Next take the average of the distance of the venues to the mean coordinates.

In [10]:
map2 = {}
for city in cities:
    city_lat = np.mean([results[city]['response']['geocode']['geometry']['bounds']['ne']['lat'],
                        results[city]['response']['geocode']['geometry']['bounds']['sw']['lat']])
    city_lng = np.mean([results[city]['response']['geocode']['geometry']['bounds']['ne']['lng'],
                        results[city]['response']['geocode']['geometry']['bounds']['sw']['lng']])
    map2[city] = folium.Map(location=[city_lat, city_lng], zoom_start=11)
    venues_mean_coor = [df_venues[city]['Latitude'].mean(), df_venues[city]['Longitude'].mean()] 
    # add markers to map
    for lat, lng, label in zip(df_venues[city]['Latitude'], df_venues[city]['Longitude'], df_venues[city]['Name']):
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map2[city])
        folium.PolyLine([venues_mean_coor, [lat, lng]], color="green", weight=1.5, opacity=0.5).add_to(map2[city])
    
    label = folium.Popup("Mean Co-ordinate", parse_html=True)
    folium.CircleMarker(
        venues_mean_coor,
        radius=10,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map2[city])

    print(city)
    print("Mean Distance from Mean coordinates")
    print(np.mean(np.apply_along_axis(lambda x: np.linalg.norm(x - venues_mean_coor), 1,
                                      df_venues[city][['Latitude','Longitude']].values)))

New York, NY
Mean Distance from Mean coordinates
0.024721982130958282
Chicago, IL
Mean Distance from Mean coordinates
0.0695156334252791
Boston, MA
Mean Distance from Mean coordinates
0.037789975421072194
Seattle, WA
Mean Distance from Mean coordinates
0.045180032770001086


In [11]:
map2[cities[0]]

In [12]:
map2[cities[1]]

In [13]:
map2[cities[2]]

In [14]:
map2[cities[3]]

We now see that New York is the best option with the closest donut shops and large number of shops.