**Importing all the necessary libraries & all the dependencies that we will need.**

In [92]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


**Entering the Client Credentials to get the access of the FOURSQUARE**

In [93]:
CLIENT_ID = 'LN5CDXVFZBFCKGM50JGTDGBRVBWVTGFGOBFHDZJPHISXRWYR'
CLIENT_SECRET = 'UFV5XHETENIEALUAKODYRZDE5WV3ZYMOC3ZNZ40COFNPAAJU'
VERSION = '20180604'
LIMIT = 30
print('Client ID: ' + CLIENT_ID)
print('Client Secret: ' + CLIENT_SECRET)

Client ID: LN5CDXVFZBFCKGM50JGTDGBRVBWVTGFGOBFHDZJPHISXRWYR
Client Secret: UFV5XHETENIEALUAKODYRZDE5WV3ZYMOC3ZNZ40COFNPAAJU


**Enter the 5 Major Cities, define the corresponding url & 
Send the GET Request and examine the results**

In [97]:
cities = ["New York, NY", 'Chicago, IL', 'San Francisco, CA', 'Jersey City, NJ', 'Boston, MA']
results = {}
for city in cities:
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&near={}&limit={}&categoryId={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        city,
        LIMIT,
        "4bf58dd8d48988d1ca941735") # PIZZA PLACE CATEGORY ID
    results[city] = requests.get(url).json()

**Get relevant part of JSON and transform it into a pandas dataframe**

In [98]:
df_venues={}
for city in cities:
    venues = json_normalize(results[city]['response']['groups'][0]['items'])
    df_venues[city] = venues[['venue.name', 'venue.location.address', 'venue.location.lat', 'venue.location.lng']]
    df_venues[city].columns = ['Name', 'Address', 'Lat', 'Lng']

  This is separate from the ipykernel package so we can avoid doing imports until


**Finding the total number of pizza places in different cities,
the Foursquare API Only gives us the nearest 100 venues in the city.**

In [99]:
maps = {}
for city in cities:
    city_lat = np.mean([results[city]['response']['geocode']['geometry']['bounds']['ne']['lat'],
                        results[city]['response']['geocode']['geometry']['bounds']['sw']['lat']])
    city_lng = np.mean([results[city]['response']['geocode']['geometry']['bounds']['ne']['lng'],
                        results[city]['response']['geocode']['geometry']['bounds']['sw']['lng']])
    maps[city] = folium.Map(location=[city_lat, city_lng], zoom_start=11)

    # add markers to map
    for lat, lng, label in zip(df_venues[city]['Lat'], df_venues[city]['Lng'], df_venues[city]['Name']):
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(maps[city])  
    print(f"Total number of pizza places in {city} = ", results[city]['response']['totalResults'])
    

Total number of pizza places in New York, NY =  269
Total number of pizza places in Chicago, IL =  211
Total number of pizza places in San Francisco, CA =  170
Total number of pizza places in Jersey City, NJ =  128
Total number of pizza places in Boston, MA =  185


**Displaying the locations of pizza places in maps of different cities.**

In [100]:
maps[cities[0]]

In [101]:
maps[cities[1]]

In [102]:
maps[cities[2]]

In [103]:
maps[cities[3]]

In [104]:
maps[cities[4]]

**We can see that New York and Jersey City are the most dense cities with Pizza places. And better than that, they are just one shore away.
However, Let's have a concrete measure of this density.
For this I will use some basic statistics. I will get the mean location of the pizza places which should be near to most of them if they are really dense or far if not.
Next I will take the average of the distance of the venues to the mean coordinates.**

In [105]:
maps = {}
for city in cities:
    city_lat = np.mean([results[city]['response']['geocode']['geometry']['bounds']['ne']['lat'],
                        results[city]['response']['geocode']['geometry']['bounds']['sw']['lat']])
    city_lng = np.mean([results[city]['response']['geocode']['geometry']['bounds']['ne']['lng'],
                        results[city]['response']['geocode']['geometry']['bounds']['sw']['lng']])
    maps[city] = folium.Map(location=[city_lat, city_lng], zoom_start=11)
    venues_mean_coor = [df_venues[city]['Lat'].mean(), df_venues[city]['Lng'].mean()] 


    print(city)
    print("Mean Distance from Mean coordinates")
    resto=(np.mean(np.apply_along_axis(lambda x: np.linalg.norm(x - venues_mean_coor),1,df_venues[city][['Lat','Lng']].values)))
    resto = resto.round(4)
    print(resto)

New York, NY
Mean Distance from Mean coordinates
0.0162
Chicago, IL
Mean Distance from Mean coordinates
0.0534
San Francisco, CA
Mean Distance from Mean coordinates
0.0262
Jersey City, NJ
Mean Distance from Mean coordinates
0.0165
Boston, MA
Mean Distance from Mean coordinates
0.03


In [106]:
df1 = {
     'City Name':['New York, NY', 'Chicago, IL', 'San Francisco, CA', 'Jersey City, NJ', 'Boston, MA'],
   'Mean Distance':[0.0162,0.0534,0.0262,0.0165,0.03]}
df1 = pd.DataFrame(df1,columns=['City Name','Mean Distance'])

**Displaying the Density level of Cities**

In [109]:
df1.loc[df1['Mean Distance'] < 0.017, 'Density Level Of Cities'] = 'High Density'
df1.loc[(df1['Mean Distance'] > 0.02) & (df1['Mean Distance'] < 0.03), 'Density Level Of Cities'] = 'Moderate Density'
df1.loc[df1['Mean Distance'] >= 0.03, 'Density Level Of Cities'] = 'Low Density'
df1.head()

Unnamed: 0,City Name,Mean Distance,Density Level Of Cities
0,"New York, NY",0.0162,High Density
1,"Chicago, IL",0.0534,Low Density
2,"San Francisco, CA",0.0262,Moderate Density
3,"Jersey City, NJ",0.0165,High Density
4,"Boston, MA",0.03,Low Density


**As we have calculated the mean distance from mean coordinators, we can see that New York & Jersey city are highly densed, but New york City is more dense than the Jersey city.**