In [6]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

### Exploring NY dataset

In [7]:
CLIENT_ID = 
CLIENT_SECRET = 
VERSION = '20180605' 
LIMIT=100
search_query='coffee'

In [8]:
with open('nyu_2451_34572-geojson.json') as json_data:
    newyork_data = json.load(json_data)


In [9]:
neighborhoods_data = newyork_data['features']

In [10]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [11]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [12]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [13]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
loc = geolocator.geocode(address)
lat = loc.latitude
long = loc.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(lat, long))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [15]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[lat, long], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

### Explore each Borough's neighborhood and their coffee shops

##### Let's create a function to repeat the same process to all the neighborhoods in all boroughs

In [16]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [146]:
# function to get neccessary venues near location
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET,  
        lat, 
        lng, 
        VERSION,
        search_query,
        radius, 
        LIMIT)

        # make the GET request
    results = requests.get(url).json()["response"]['venues']
        
    dataframe = json_normalize(results)
    # keep only columns that include venue name, latitude and longitude
    filtered_columns = ['name', 'categories'] +['location.lat','location.lng']+['id']
    dataframe_filtered = dataframe.loc[:, filtered_columns]

    # filter the category for each row
    dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)
    dataframe_filtered['Neighborhood']=name 
    dataframe_filtered['Neighborhood Latitude']=lat
    dataframe_filtered ['Neighborhood Longitude']=lng
    
    dataframe_filtered=dataframe_filtered.rename(columns={"name": "Venue", "categories": "Venue category",'location.lat': 'Venue Latitide', 'location.lng':'Venue Longitude' })              
    
    return(dataframe_filtered)

In [147]:
# Function to get venues rating
def get_rating(venue_id):
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

    result = requests.get(url).json()
    try:
        return result['response']['venue']['rating']
    except:
        return(0)

### 1. Manhattan

In [18]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [19]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7896239, -73.9598939.


In [20]:
# create map of Manhattan using latitude and longitude values
map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan

In [148]:
# Applying function of getting venues with coffee to Manhattan
manhattan_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude'])

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards




In [150]:
ids=manhattan_venues['id'].tolist()
score=[]
for vid in ids:
    score.append(get_rating(vid))
manhattan_venues['rating']=score

In [151]:
manhattan_venues.head()

Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating
0,Romeo and Juliet Coffee,Coffee Shop,40.760726,-73.997724,526d9114498ec8efda93fcce,Hudson Yards,40.756658,-74.000111,7.5
1,Oslo Coffee Roasters,Coffee Shop,40.760512,-73.998985,5c683caaf96b2c00397a8609,Hudson Yards,40.756658,-74.000111,8.3
2,Clovelly Coffee,Coffee Shop,40.753292,-73.996745,5d6165d5716af50008cde370,Hudson Yards,40.756658,-74.000111,7.7
3,R/GA Coffee Bar,Café,40.755948,-73.993659,4b9fe9eef964a520f84937e3,Hudson Yards,40.756658,-74.000111,0.0
4,Blue Bottle Coffee,Coffee Shop,40.753846,-74.00225,5c8d246c492822003a5a5a6e,Hudson Yards,40.756658,-74.000111,7.4


In [152]:
manhattan_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue,Venue category,Venue Latitide,Venue Longitude,id,Neighborhood Latitude,Neighborhood Longitude,rating
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Hudson Yards,21,21,21,21,21,21,21,21


In [153]:
manhattan_venues.groupby('Venue category').count()

Unnamed: 0_level_0,Venue,Venue Latitide,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating
Venue category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Breakfast Spot,2,2,2,2,2,2,2,2
Café,3,3,3,3,3,3,3,3
Coffee Shop,11,11,11,11,11,11,11,11
Deli / Bodega,1,1,1,1,1,1,1,1
Food,1,1,1,1,1,1,1,1
Food Truck,1,1,1,1,1,1,1,1
Latin American Restaurant,1,1,1,1,1,1,1,1
Restaurant,1,1,1,1,1,1,1,1


### 2. Bronx

In [28]:
bronx_data = neighborhoods[neighborhoods['Borough'] == 'Bronx'].reset_index(drop=True)
bronx_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [29]:
address = 'Bronx, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of bronx are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of bronx are 40.8466508, -73.8785937.


In [30]:

# create map of bronx using latitude and longitude values
map_bronx = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(bronx_data['Latitude'], bronx_data['Longitude'], bronx_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bronx)  
    
map_bronx

In [161]:
search_query='coffee'
bronx_coffee = getNearbyVenues(names=bronx_data['Neighborhood'],
                                   latitudes=bronx_data['Latitude'],
                                   longitudes=bronx_data['Longitude'],radius=1000)


Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Claremont Village
Concourse Village
Mount Eden
Mount Hope
Bronxdale
Allerton
Kingsbridge Heights




In [164]:
ids=bronx_coffee['id'].tolist()
score=[]
for vid in ids:
    score.append(get_rating(vid))
bronx_coffee['rating']=score

In [165]:
bronx_coffee.head()

Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating
0,National Restaurant & Coffee,Diner,40.873007,-73.889082,4b78a508f964a520b4db2ee3,Kingsbridge Heights,40.870392,-73.901523,8.1
1,Perista Coffee Shop,Coffee Shop,40.868314,-73.901298,5156f35be4b0dc046e1af723,Kingsbridge Heights,40.870392,-73.901523,0.0
2,Kingsbridge Coffee Shop & Deli,Coffee Shop,40.86721,-73.896699,4dad975dfa8cf64d804b25e9,Kingsbridge Heights,40.870392,-73.901523,5.9
3,Coffee Cart,Breakfast Spot,40.874555,-73.910358,50bf6001e4b068101192a373,Kingsbridge Heights,40.870392,-73.901523,0.0
4,Coffee Cart on Fordham Rd,Food Truck,40.861292,-73.890206,4c8637a847cc224b2343a79f,Kingsbridge Heights,40.870392,-73.901523,0.0


In [37]:
#bronx_places=pd.concat([bronx_venues,bronx_coffee],ignore_index=True)

In [166]:
bronx_coffee.groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue,Venue category,Venue Latitide,Venue Longitude,id,Neighborhood Latitude,Neighborhood Longitude,rating
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Kingsbridge Heights,8,8,8,8,8,8,8,8


In [167]:
bronx_coffee.groupby('Venue category').count()

Unnamed: 0_level_0,Venue,Venue Latitide,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating
Venue category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Breakfast Spot,1,1,1,1,1,1,1,1
Coffee Shop,4,4,4,4,4,4,4,4
Deli / Bodega,1,1,1,1,1,1,1,1
Diner,1,1,1,1,1,1,1,1
Food Truck,1,1,1,1,1,1,1,1


### 3. Brooklyn

In [38]:
brooklyn_data = neighborhoods[neighborhoods['Borough'] == 'Brooklyn'].reset_index(drop=True)
brooklyn_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Brooklyn,Bay Ridge,40.625801,-74.030621
1,Brooklyn,Bensonhurst,40.611009,-73.99518
2,Brooklyn,Sunset Park,40.645103,-74.010316
3,Brooklyn,Greenpoint,40.730201,-73.954241
4,Brooklyn,Gravesend,40.59526,-73.973471


In [39]:
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of brooklyn are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of brooklyn are 40.6501038, -73.9495823.


In [40]:
# create map of brooklyn using latitude and longitude values
map_brooklyn = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(brooklyn_data['Latitude'], brooklyn_data['Longitude'], brooklyn_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_brooklyn)  
    
map_brooklyn

In [168]:
search_query='coffee'
brooklyn_coffee = getNearbyVenues(names=brooklyn_data['Neighborhood'],
                                   latitudes=brooklyn_data['Latitude'],
                                   longitudes=brooklyn_data['Longitude'])

Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker Heights
Gerritsen Beach
Marine Park
Clinton Hill
Sea Gate
Downtown
Boerum Hill
Prospect Lefferts Gardens
Ocean Hill
City Line
Bergen Beach
Midwood
Prospect Park South
Georgetown
East Williamsburg
North Side
South Side
Ocean Parkway
Fort Hamilton
Ditmas Park
Wingate
Rugby
Remsen Village
New Lots
Paerdegat Basin
Mill Basin
Fulton Ferry
Vinegar Hill
Weeksville
Broadway Junction
Dumbo
Homecrest
Highland Park
Madison
Erasmus




In [169]:
ids=brooklyn_coffee['id'].tolist()
score=[]
for vid in ids:
    score.append(get_rating(vid))
brooklyn_coffee['rating']=score

In [170]:
brooklyn_coffee.head()

Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating
0,Latoya's Coffee Shop,Coffee Shop,40.644966,-73.948478,4f32795419836c91c7db4d12,Erasmus,40.646926,-73.948177,0
1,First Class Daily Coffee Shop,Coffee Shop,40.651191,-73.94652,4f322af219836c91c7bcd71c,Erasmus,40.646926,-73.948177,0


In [173]:
brooklyn_coffee.groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue,Venue category,Venue Latitide,Venue Longitude,id,Neighborhood Latitude,Neighborhood Longitude,rating
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Erasmus,2,2,2,2,2,2,2,2


In [174]:
brooklyn_coffee.groupby('Venue category').count()

Unnamed: 0_level_0,Venue,Venue Latitide,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating
Venue category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Coffee Shop,2,2,2,2,2,2,2,2


### 4.Queens 

In [51]:
queens_data = neighborhoods[neighborhoods['Borough'] == 'Queens'].reset_index(drop=True)
queens_data.head()


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Queens,Astoria,40.768509,-73.915654
1,Queens,Woodside,40.746349,-73.901842
2,Queens,Jackson Heights,40.751981,-73.882821
3,Queens,Elmhurst,40.744049,-73.881656
4,Queens,Howard Beach,40.654225,-73.838138


In [52]:
address = 'Queens, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of queens are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of queens are 40.7498243, -73.7976337.


In [53]:
# create map of queens using latitude and longitude values
map_queens = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(queens_data['Latitude'], queens_data['Longitude'], queens_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_queens)  
    
map_queens


In [175]:
search_query='coffee'
queens_coffee = getNearbyVenues(names=queens_data['Neighborhood'],
                                   latitudes=queens_data['Latitude'],
                                   longitudes=queens_data['Longitude'])


Astoria
Woodside
Jackson Heights
Elmhurst
Howard Beach
Corona
Forest Hills
Kew Gardens
Richmond Hill
Flushing
Long Island City
Sunnyside
East Elmhurst
Maspeth
Ridgewood
Glendale
Rego Park
Woodhaven
Ozone Park
South Ozone Park
College Point
Whitestone
Bayside
Auburndale
Little Neck
Douglaston
Glen Oaks
Bellerose
Kew Gardens Hills
Fresh Meadows
Briarwood
Jamaica Center
Oakland Gardens
Queens Village
Hollis
South Jamaica
St. Albans
Rochdale
Springfield Gardens
Cambria Heights
Rosedale
Far Rockaway
Broad Channel
Breezy Point
Steinway
Beechhurst
Bay Terrace
Edgemere
Arverne
Rockaway Beach
Neponsit
Murray Hill
Floral Park
Holliswood
Jamaica Estates
Queensboro Hill
Hillcrest
Ravenswood
Lindenwood
Laurelton
Lefrak City
Belle Harbor
Rockaway Park
Somerville
Brookville
Bellaire
North Corona
Forest Hills Gardens
Jamaica Hills
Utopia
Pomonok
Astoria Heights
Hunters Point
Sunnyside Gardens
Blissville
Roxbury
Middle Village
Malba
Hammels
Bayswater
Queensbridge




In [176]:
ids=queens_coffee['id'].tolist()
score=[]
for vid in ids:
    score.append(get_rating(vid))
queens_coffee['rating']=score

In [177]:
queens_coffee.head()

Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating
0,Joe Coffee Company,Café,40.753312,-73.940044,5db08b4de555080008a6ffe1,Queensbridge,40.756091,-73.945631,0
1,Carlos Coffee Shop,Coffee Shop,40.75392,-73.94226,4f32763019836c91c7da1c56,Queensbridge,40.756091,-73.945631,0
2,Mama’s Coffee Shop & Restaurant,Coffee Shop,40.755649,-73.94176,5d17683fca917c0023dfb1d7,Queensbridge,40.756091,-73.945631,0
3,Rosies Coffee Shop,Coffee Shop,40.751232,-73.944487,4de4efde1f6e3190cd4a0470,Queensbridge,40.756091,-73.945631,0
4,Birch Coffee,Coffee Shop,40.753138,-73.940526,56377108498ee642752d0355,Queensbridge,40.756091,-73.945631,0


In [179]:
queens_coffee.groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue,Venue category,Venue Latitide,Venue Longitude,id,Neighborhood Latitude,Neighborhood Longitude,rating
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Queensbridge,5,5,5,5,5,5,5,5


In [180]:
queens_coffee.groupby('Venue category').count()

Unnamed: 0_level_0,Venue,Venue Latitide,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating
Venue category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Café,1,1,1,1,1,1,1,1
Coffee Shop,4,4,4,4,4,4,4,4


### 5. Staten Island

In [61]:
si_data = neighborhoods[neighborhoods['Borough'] == 'Staten Island'].reset_index(drop=True)
si_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Staten Island,St. George,40.644982,-74.079353
1,Staten Island,New Brighton,40.640615,-74.087017
2,Staten Island,Stapleton,40.626928,-74.077902
3,Staten Island,Rosebank,40.615305,-74.069805
4,Staten Island,West Brighton,40.631879,-74.107182


In [66]:
address = 'Staten Island, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of si are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of si are 40.5834557, -74.1496048.


In [67]:
# create map of si using latitude and longitude values
map_si = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(si_data['Latitude'], si_data['Longitude'], si_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_si)  
    
map_si

In [68]:
search_query='coffee'
si_venues = getNearbyVenues(names=si_data['Neighborhood'],
                                   latitudes=si_data['Latitude'],
                                   longitudes=si_data['Longitude'])


St. George
New Brighton
Stapleton
Rosebank
West Brighton
Grymes Hill
Todt Hill
South Beach
Port Richmond
Mariner's Harbor
Port Ivory
Castleton Corners
New Springville
Travis
New Dorp
Oakwood
Great Kills
Eltingville
Annadale
Woodrow
Tottenville
Tompkinsville
Silver Lake
Sunnyside
Park Hill
Westerleigh
Graniteville
Arlington
Arrochar
Grasmere
Old Town
Dongan Hills
Midland Beach
Grant City
New Dorp Beach
Bay Terrace
Huguenot
Pleasant Plains
Butler Manor
Charleston
Rossville
Arden Heights
Greenridge
Heartland Village
Chelsea
Bloomfield
Bulls Head
Richmond Town
Shore Acres
Clifton
Concord
Emerson Hill
Randall Manor
Howland Hook
Elm Park
Manor Heights
Willowbrook
Sandy Ground
Egbertville
Prince's Bay
Lighthouse Hill
Richmond Valley
Fox Hills




KeyError: "None of [Index(['name', 'categories', 'location.lat', 'location.lng'], dtype='object')] are in the [columns]"

#### It looks like there is no registered coffee shop in foursuare API  in this borough

In [183]:
NY_venues=pd.concat([manhattan_venues,bronx_coffee,brooklyn_coffee,queens_coffee],ignore_index=True)

In [184]:
NY_venues

Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating
0,Romeo and Juliet Coffee,Coffee Shop,40.760726,-73.997724,526d9114498ec8efda93fcce,Hudson Yards,40.756658,-74.000111,7.5
1,Oslo Coffee Roasters,Coffee Shop,40.760512,-73.998985,5c683caaf96b2c00397a8609,Hudson Yards,40.756658,-74.000111,8.3
2,Clovelly Coffee,Coffee Shop,40.753292,-73.996745,5d6165d5716af50008cde370,Hudson Yards,40.756658,-74.000111,7.7
3,R/GA Coffee Bar,Café,40.755948,-73.993659,4b9fe9eef964a520f84937e3,Hudson Yards,40.756658,-74.000111,0.0
4,Blue Bottle Coffee,Coffee Shop,40.753846,-74.00225,5c8d246c492822003a5a5a6e,Hudson Yards,40.756658,-74.000111,7.4
5,Think Coffee,Coffee Shop,40.752244,-74.001707,55fc450a498ed76a0b227750,Hudson Yards,40.756658,-74.000111,7.8
6,Old Country Coffee,Café,40.754371,-73.998727,5758771d498ecbe6c8cc39c5,Hudson Yards,40.756658,-74.000111,7.7
7,Champion Coffee,Coffee Shop,40.754853,-73.993904,5b576a7b625a66002c930db5,Hudson Yards,40.756658,-74.000111,0.0
8,Jack’s Stir Brew Coffee,Coffee Shop,40.754054,-74.002213,5c8d4eb71f8ed6002c94f624,Hudson Yards,40.756658,-74.000111,6.1
9,Exclusive Coffee & Juice,Coffee Shop,40.756886,-73.99391,5e5e936636734600081ebbf1,Hudson Yards,40.756658,-74.000111,0.0


### Clustering

In [185]:
# one hot encoding
NY_onehot = pd.get_dummies(NY_venues[['Venue category']], prefix="", prefix_sep="")

# add Neighborhood column back to dataframe
NY_onehot['Venue Longitude'] = NY_venues['Venue Longitude'] 
NY_onehot['Venue Latitide'] = NY_venues['Venue Latitide'] 
NY_onehot['rating'] = NY_venues['rating'] 
NY_onehot['Neighborhood'] = NY_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [NY_onehot.columns[-1]] + list(NY_onehot.columns[:-1])
NY_onehot = NY_onehot[fixed_columns]

# Check returned one hot encoding data:
print('One hot encoding returned "{}" rows.'.format(NY_onehot.shape[0]))

# Regroup rows by Neighborhood and mean of frequency occurrence per category.
NY_grouped = NY_onehot.groupby('Neighborhood').mean().reset_index()

print('One hot encoding re-group returned "{}" rows.'.format(NY_grouped.shape[0]))
NY_grouped.head()

One hot encoding returned "36" rows.
One hot encoding re-group returned "4" rows.


Unnamed: 0,Neighborhood,Breakfast Spot,Café,Coffee Shop,Deli / Bodega,Diner,Food,Food Truck,Latin American Restaurant,Restaurant,Venue Longitude,Venue Latitide,rating
0,Erasmus,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.947499,40.648078,0.0
1,Hudson Yards,0.095238,0.142857,0.52381,0.047619,0.0,0.047619,0.047619,0.047619,0.047619,-73.996543,40.75602,3.452381
2,Kingsbridge Heights,0.125,0.0,0.5,0.125,0.125,0.0,0.125,0.0,0.0,-73.898844,40.871249,1.75
3,Queensbridge,0.0,0.2,0.8,0.0,0.0,0.0,0.0,0.0,0.0,-73.941815,40.75345,0.0


In [191]:
NY_grouped2=NY_grouped.drop(['Venue Longitude','Venue Latitide','rating'],axis=1)
for hood in NY_grouped2['Neighborhood']:
    print("----"+hood+"----")
    temp = NY_grouped2[NY_grouped2['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head())
    print('\n')

----Erasmus----
            venue  freq
0     Coffee Shop   1.0
1  Breakfast Spot   0.0
2            Café   0.0
3   Deli / Bodega   0.0
4           Diner   0.0


----Hudson Yards----
            venue  freq
0     Coffee Shop  0.52
1            Café  0.14
2  Breakfast Spot  0.10
3   Deli / Bodega  0.05
4            Food  0.05


----Kingsbridge Heights----
            venue  freq
0     Coffee Shop  0.50
1  Breakfast Spot  0.12
2   Deli / Bodega  0.12
3           Diner  0.12
4      Food Truck  0.12


----Queensbridge----
            venue  freq
0     Coffee Shop   0.8
1            Café   0.2
2  Breakfast Spot   0.0
3   Deli / Bodega   0.0
4           Diner   0.0




In [187]:
# Function to return most common venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [194]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = NY_grouped2['Neighborhood']

for ind in np.arange(NY_grouped2.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(NY_grouped2.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Erasmus,Coffee Shop,Restaurant,Latin American Restaurant,Food Truck,Food
1,Hudson Yards,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant
2,Kingsbridge Heights,Coffee Shop,Food Truck,Diner,Deli / Bodega,Breakfast Spot
3,Queensbridge,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck


### Clustering

In [196]:
# set number of clusters
kclusters = 3

NY_grouped_clustering = NY_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0)
kmeans.fit(NY_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 2, 0], dtype=int32)

In [197]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

NY_merged = NY_venues
NY_merged = NY_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

NY_merged.tail()

Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
31,Joe Coffee Company,Café,40.753312,-73.940044,5db08b4de555080008a6ffe1,Queensbridge,40.756091,-73.945631,0.0,0,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck
32,Carlos Coffee Shop,Coffee Shop,40.75392,-73.94226,4f32763019836c91c7da1c56,Queensbridge,40.756091,-73.945631,0.0,0,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck
33,Mama’s Coffee Shop & Restaurant,Coffee Shop,40.755649,-73.94176,5d17683fca917c0023dfb1d7,Queensbridge,40.756091,-73.945631,0.0,0,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck
34,Rosies Coffee Shop,Coffee Shop,40.751232,-73.944487,4de4efde1f6e3190cd4a0470,Queensbridge,40.756091,-73.945631,0.0,0,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck
35,Birch Coffee,Coffee Shop,40.753138,-73.940526,56377108498ee642752d0355,Queensbridge,40.756091,-73.945631,0.0,0,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck


In [198]:
map_clusters = folium.Map(location=[lat, long], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(NY_merged['Neighborhood Latitude'], NY_merged['Neighborhood Longitude'], NY_merged['Neighborhood'], NY_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters
***Cluster 0***

In [199]:
NY_merged.loc[NY_merged['Cluster Labels'] == 0, NY_merged.columns[[1] + list(range(3, NY_merged.shape[1]))]]

Unnamed: 0,Venue category,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
29,Coffee Shop,-73.948478,4f32795419836c91c7db4d12,Erasmus,40.646926,-73.948177,0.0,0,Coffee Shop,Restaurant,Latin American Restaurant,Food Truck,Food
30,Coffee Shop,-73.94652,4f322af219836c91c7bcd71c,Erasmus,40.646926,-73.948177,0.0,0,Coffee Shop,Restaurant,Latin American Restaurant,Food Truck,Food
31,Café,-73.940044,5db08b4de555080008a6ffe1,Queensbridge,40.756091,-73.945631,0.0,0,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck
32,Coffee Shop,-73.94226,4f32763019836c91c7da1c56,Queensbridge,40.756091,-73.945631,0.0,0,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck
33,Coffee Shop,-73.94176,5d17683fca917c0023dfb1d7,Queensbridge,40.756091,-73.945631,0.0,0,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck
34,Coffee Shop,-73.944487,4de4efde1f6e3190cd4a0470,Queensbridge,40.756091,-73.945631,0.0,0,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck
35,Coffee Shop,-73.940526,56377108498ee642752d0355,Queensbridge,40.756091,-73.945631,0.0,0,Coffee Shop,Café,Restaurant,Latin American Restaurant,Food Truck


***It looks like all venues which are not rated are in this category***

### Cluster 1

In [200]:
NY_merged.loc[NY_merged['Cluster Labels'] == 1, NY_merged.columns[[1] + list(range(3, NY_merged.shape[1]))]]

Unnamed: 0,Venue category,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Coffee Shop,-73.997724,526d9114498ec8efda93fcce,Hudson Yards,40.756658,-74.000111,7.5,1,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant
1,Coffee Shop,-73.998985,5c683caaf96b2c00397a8609,Hudson Yards,40.756658,-74.000111,8.3,1,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant
2,Coffee Shop,-73.996745,5d6165d5716af50008cde370,Hudson Yards,40.756658,-74.000111,7.7,1,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant
3,Café,-73.993659,4b9fe9eef964a520f84937e3,Hudson Yards,40.756658,-74.000111,0.0,1,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant
4,Coffee Shop,-74.00225,5c8d246c492822003a5a5a6e,Hudson Yards,40.756658,-74.000111,7.4,1,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant
5,Coffee Shop,-74.001707,55fc450a498ed76a0b227750,Hudson Yards,40.756658,-74.000111,7.8,1,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant
6,Café,-73.998727,5758771d498ecbe6c8cc39c5,Hudson Yards,40.756658,-74.000111,7.7,1,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant
7,Coffee Shop,-73.993904,5b576a7b625a66002c930db5,Hudson Yards,40.756658,-74.000111,0.0,1,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant
8,Coffee Shop,-74.002213,5c8d4eb71f8ed6002c94f624,Hudson Yards,40.756658,-74.000111,6.1,1,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant
9,Coffee Shop,-73.99391,5e5e936636734600081ebbf1,Hudson Yards,40.756658,-74.000111,0.0,1,Coffee Shop,Café,Breakfast Spot,Restaurant,Latin American Restaurant


***As Hudson Yards the most popular place for coffee shops,it has created seperate cluster***

### Cluster 2

In [201]:
NY_merged.loc[NY_merged['Cluster Labels'] == 2, NY_merged.columns[[1] + list(range(3, NY_merged.shape[1]))]]

Unnamed: 0,Venue category,Venue Longitude,id,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,rating,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
21,Diner,-73.889082,4b78a508f964a520b4db2ee3,Kingsbridge Heights,40.870392,-73.901523,8.1,2,Coffee Shop,Food Truck,Diner,Deli / Bodega,Breakfast Spot
22,Coffee Shop,-73.901298,5156f35be4b0dc046e1af723,Kingsbridge Heights,40.870392,-73.901523,0.0,2,Coffee Shop,Food Truck,Diner,Deli / Bodega,Breakfast Spot
23,Coffee Shop,-73.896699,4dad975dfa8cf64d804b25e9,Kingsbridge Heights,40.870392,-73.901523,5.9,2,Coffee Shop,Food Truck,Diner,Deli / Bodega,Breakfast Spot
24,Breakfast Spot,-73.910358,50bf6001e4b068101192a373,Kingsbridge Heights,40.870392,-73.901523,0.0,2,Coffee Shop,Food Truck,Diner,Deli / Bodega,Breakfast Spot
25,Food Truck,-73.890206,4c8637a847cc224b2343a79f,Kingsbridge Heights,40.870392,-73.901523,0.0,2,Coffee Shop,Food Truck,Diner,Deli / Bodega,Breakfast Spot
26,Coffee Shop,-73.889053,4fe48299e4b06b1c063a2c33,Kingsbridge Heights,40.870392,-73.901523,0.0,2,Coffee Shop,Food Truck,Diner,Deli / Bodega,Breakfast Spot
27,Coffee Shop,-73.90873,4b292e73f964a5207b9a24e3,Kingsbridge Heights,40.870392,-73.901523,0.0,2,Coffee Shop,Food Truck,Diner,Deli / Bodega,Breakfast Spot
28,Deli / Bodega,-73.905327,4df96fe0b0fb0401c7f57c23,Kingsbridge Heights,40.870392,-73.901523,0.0,2,Coffee Shop,Food Truck,Diner,Deli / Bodega,Breakfast Spot


***The second most popular place is Kingsbridge Heights***

## Let's try to find coffee shops for 2 persons, which will be more or less equal distanced for both of them

In [119]:
# Address of first person
address = '292 Greenwich St, New York, NY 10007'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

40.71596 -74.0112857


In [120]:
# A little modification of this function 
def getNearbyVenues2(latitude,longitude,radius=500):
            
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET,  
        latitude, 
        longitude, 
        VERSION,
        search_query,
        radius, 
        LIMIT)

        # make the GET request
    results = requests.get(url).json()["response"]['venues']
        
    dataframe = json_normalize(results)
    # keep only columns that include venue name, and anything that is associated with location
    filtered_columns = ['name', 'categories'] +['location.lat','location.lng', 'location.address']
    dataframe_filtered = dataframe.loc[:, filtered_columns]

    # filter the category for each row
    dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

    dataframe_filtered=dataframe_filtered.rename(columns={"name": "Venue", "categories": "Venue category",'location.lat': 'Venue Latitide', 'location.lng':'Venue Longitude','location.address':'Address' })              
    return(dataframe_filtered)

In [121]:
# Looking for coffee shops around
search_query='coffee'
dataframe_filtered= getNearbyVenues2(latitude,longitude)
dataframe_filtered



Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,Address
0,For Five Coffee Roasters,Coffee Shop,40.71224,-74.014915,
1,Blue Spoon Coffee Co.,Coffee Shop,40.714428,-74.006859,76 Chambers St
2,Hudson/Chambers Coffee Cart,Food Truck,40.715638,-74.009613,Chambers St
3,Laughing Man Coffee & Tea,Coffee Shop,40.717394,-74.010103,184 Duane St
4,Jack’s Stir Brew Coffee,Coffee Shop,40.71645,-74.009755,139 Reade St
5,Blue Bottle Coffee,Coffee Shop,40.710589,-74.012371,150 Greenwich St
6,Coffee Cart,Coffee Shop,40.715331,-74.011562,
7,Sky Lobby Coffee Bar,Corporate Cafeteria,40.714708,-74.014404,200 West St
8,Joe Coffee,Coffee Shop,40.712526,-74.013137,Oculus Passage
9,3rd Floor Coffee Bar,Corporate Coffee Shop,40.714757,-74.014329,200 West St


### Showing these locations on map

In [122]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) 

# add a red circle marker to represent the 1st person location
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Your location',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the coffee shops as blue circle markers
for lat, lng, label in zip(dataframe_filtered['Venue Latitide'], dataframe_filtered['Venue Longitude'], dataframe_filtered['Venue category']):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

## Address of second person

In [102]:
address2 = '70 Thomas St, New York, NY 10013, USA'

geolocator2 = Nominatim(user_agent="foursquare_agent")
location2 = geolocator.geocode(address2)
latitude2 = location2.latitude
longitude2 = location2.longitude
print(latitude2, longitude2)

40.71685275 -74.00771469641813


In [113]:
# Looking for coffee shops around
search_query='coffee'
dataframe_filtered2= getNearbyVenues2(latitude2,longitude2)
dataframe_filtered2



Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,Address
0,Blue Spoon Coffee Co.,Coffee Shop,40.714428,-74.006859,76 Chambers St
1,Hudson/Chambers Coffee Cart,Food Truck,40.715638,-74.009613,Chambers St
2,Coffee Cart,Coffee Shop,40.714389,-74.002791,60 Centre St
3,West Side Coffee Shop,Latin American Restaurant,40.720174,-74.003915,323 Church St
4,Dunkin' Iced Coffee Lounge at Z100,Music Venue,40.72,-74.004561,
5,Laughing Man Coffee & Tea,Coffee Shop,40.717394,-74.010103,184 Duane St
6,Jack’s Stir Brew Coffee,Coffee Shop,40.71645,-74.009755,139 Reade St
7,Coffee Cart,Food Truck,40.717064,-74.006391,W. Broadway & Thomas Street
8,Blue Bottle Coffee,Coffee Shop,40.718355,-74.002215,396 Broadway
9,Coffee Shop,Café,40.718039,-74.008331,


In [106]:

venues_map = folium.Map(location=[latitude2, longitude2], zoom_start=13) 
# add a red circle marker to represent 2nd person's location
folium.features.CircleMarker(
    [latitude2, longitude2],
    radius=10,
    color='red',
    popup='Partners location',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the coffee shops as blue circle markers
for lat, lng, label in zip(dataframe_filtered2['Venue Latitide'], dataframe_filtered2['Venue Longitude'], dataframe_filtered2['Venue category']):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

## NOW we are going to check common places for these 2 locations

In [123]:
res=pd.concat([dataframe_filtered,dataframe_filtered2],ignore_index=True)
res

Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,Address
0,For Five Coffee Roasters,Coffee Shop,40.71224,-74.014915,
1,Blue Spoon Coffee Co.,Coffee Shop,40.714428,-74.006859,76 Chambers St
2,Hudson/Chambers Coffee Cart,Food Truck,40.715638,-74.009613,Chambers St
3,Laughing Man Coffee & Tea,Coffee Shop,40.717394,-74.010103,184 Duane St
4,Jack’s Stir Brew Coffee,Coffee Shop,40.71645,-74.009755,139 Reade St
5,Blue Bottle Coffee,Coffee Shop,40.710589,-74.012371,150 Greenwich St
6,Coffee Cart,Coffee Shop,40.715331,-74.011562,
7,Sky Lobby Coffee Bar,Corporate Cafeteria,40.714708,-74.014404,200 West St
8,Joe Coffee,Coffee Shop,40.712526,-74.013137,Oculus Passage
9,3rd Floor Coffee Bar,Corporate Coffee Shop,40.714757,-74.014329,200 West St


### Creating a dataframe with common places

In [124]:
duplicateRowsDF = res[res.duplicated(['Address'], keep='first')]
duplicateRowsDF=duplicateRowsDF.dropna(subset=['Address']).reset_index()
duplicateRowsDF

Unnamed: 0,index,Venue,Venue category,Venue Latitide,Venue Longitude,Address
0,9,3rd Floor Coffee Bar,Corporate Coffee Shop,40.714757,-74.014329,200 West St
1,40,Blue Spoon Coffee Co.,Coffee Shop,40.714428,-74.006859,76 Chambers St
2,41,Hudson/Chambers Coffee Cart,Food Truck,40.715638,-74.009613,Chambers St
3,45,Laughing Man Coffee & Tea,Coffee Shop,40.717394,-74.010103,184 Duane St
4,46,Jack’s Stir Brew Coffee,Coffee Shop,40.71645,-74.009755,139 Reade St
5,47,Coffee Cart,Food Truck,40.717064,-74.006391,W. Broadway & Thomas Street
6,51,Lafayette/Worth Coffee Cart,Food Truck,40.714751,-74.004314,125 Worth St
7,52,Coffee Cart,Food Court,40.719252,-74.008515,Hudson St and Franklin St
8,53,Sam's Coffee,Food Truck,40.716706,-74.00625,214 Church St.
9,54,Supreme Coffee,Food Truck,40.716884,-74.005879,60 Worth St


### Showing these locations on map

In [125]:
venues_map = folium.Map(location=[latitude2, longitude2], zoom_start=13) 
# add a red circle marker to represent both location
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Your location',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

folium.features.CircleMarker(
    [latitude2, longitude2],
    radius=10,
    color='red',
    popup='Partners location',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the coffee shops as blue circle markers
for lat, lng, label in zip(duplicateRowsDF['Venue Latitide'], duplicateRowsDF['Venue Longitude'], duplicateRowsDF['Venue category']):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

### Making more universal function, to adapt for long distanced 2 locations

In [129]:
# Calculating distance between 2 endpoints
def haversine_array(lat1, lng1, lat2, lng2):
    lat1, lng1, lat2, lng2 = map(np.radians, (lat1, lng1, lat2, lng2))
    AVG_EARTH_RADIUS = 6371  # in km
    lat = lat2 - lat1
    lng = lng2 - lng1
    d = np.sin(lat * 0.5) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(lng * 0.5) ** 2
    h = 2 * AVG_EARTH_RADIUS * np.arcsin(np.sqrt(d))*1000
    return h

In [202]:
# A little modification of this function by adding venue id
def getNearbyVenues2(latitude,longitude,radius=500):
            
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET,  
        latitude, 
        longitude, 
        VERSION,
        search_query,
        radius, 
        LIMIT)

        # make the GET request
    results = requests.get(url).json()["response"]['venues']
        
    dataframe = json_normalize(results)
    # keep only columns that include venue name, and anything that is associated with location
    filtered_columns = ['name', 'categories'] +['location.lat','location.lng', 'location.address']+['id']#+['neighborhood']
    dataframe_filtered = dataframe.loc[:, filtered_columns]

    # filter the category for each row
    dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

    dataframe_filtered=dataframe_filtered.rename(columns={"name": "Venue", "categories": "Venue category",'location.lat': 'Venue Latitide', 'location.lng':'Venue Longitude','location.address':'Address' })              
    return(dataframe_filtered)

### Radius for search will be the half of distance between 2 endpoints

In [130]:
r=haversine_array(latitude,longitude,latitude2,longitude2)/2

#### Let's try to use with better function previous 2 locations

In [137]:
search_query = 'coffee'
#I decided to add extra 100m for distance
radius = r+100
dataframe_filtered_new= getNearbyVenues2(latitude,longitude,radius)
dataframe_filtered_new



Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,Address,id
0,Hudson/Chambers Coffee Cart,Food Truck,40.715638,-74.009613,Chambers St,50a38463e4b0d1371722335f
1,Laughing Man Coffee & Tea,Coffee Shop,40.717394,-74.010103,184 Duane St,4ea0afbf9adf1e334e4cc0e6
2,Jack’s Stir Brew Coffee,Coffee Shop,40.71645,-74.009755,139 Reade St,597a11ccb2958f056b27c60d
3,Coffee Cart,Coffee Shop,40.715331,-74.011562,,4e6ded5e45dd293273a36c79
4,MBJ Coffee Kiosk @ BMCC,Café,40.71699,-74.012021,"199 Chambers St, New York, NY 10013",4f7c472de4b09b8aeaed0826
5,I m a coffee,Café,40.714289,-74.010661,9 Saint Marks Pl,52a3a47e11d2b5d5943d33dc
6,38th floor coffee cart,Coffee Shop,40.714011,-74.013645,200 Vesey St,5214cdc411d2f67e0de859f4
7,Starbucks,Coffee Shop,40.715534,-74.00903,125 Chambers St,4a718a5af964a52041d91fe3


In [138]:
search_query = 'coffee'
radius = r+100
dataframe_filtered2new= getNearbyVenues2(latitude2,longitude2,radius)
dataframe_filtered2new




Unnamed: 0,Venue,Venue category,Venue Latitide,Venue Longitude,Address,id
0,Blue Spoon Coffee Co.,Coffee Shop,40.714428,-74.006859,76 Chambers St,49c79540f964a520af571fe3
1,Hudson/Chambers Coffee Cart,Food Truck,40.715638,-74.009613,Chambers St,50a38463e4b0d1371722335f
2,Laughing Man Coffee & Tea,Coffee Shop,40.717394,-74.010103,184 Duane St,4ea0afbf9adf1e334e4cc0e6
3,Jack’s Stir Brew Coffee,Coffee Shop,40.71645,-74.009755,139 Reade St,597a11ccb2958f056b27c60d
4,Coffee Cart,Food Truck,40.717064,-74.006391,W. Broadway & Thomas Street,50781223e4b047ff2af4c231
5,Coffee Shop,Café,40.718039,-74.008331,,4e9d9fa06c251c381e0f68da
6,Sam's Coffee,Food Truck,40.716706,-74.00625,214 Church St.,4cab17e6f47ea14340588821
7,Supreme Coffee,Food Truck,40.716884,-74.005879,60 Worth St,4bc710c68b7c9c7450b435cf
8,george howell coffee,Coffee Shop,40.718582,-74.005577,102 Franklin St,539212f3498e10da32c38e98
9,Mr. Akram's Coffee Cart,Food Truck,40.718953,-74.006673,,4c97590f6b35a143141b35dc


In [139]:
res=pd.concat([dataframe_filtered_new,dataframe_filtered2new],ignore_index=True)
duplicateRowsDF = res[res.duplicated(['Address'], keep='first')]
duplicateRowsDF=duplicateRowsDF.dropna(subset=['Address']).reset_index()
duplicateRowsDF

Unnamed: 0,index,Venue,Venue category,Venue Latitide,Venue Longitude,Address,id
0,9,Hudson/Chambers Coffee Cart,Food Truck,40.715638,-74.009613,Chambers St,50a38463e4b0d1371722335f
1,10,Laughing Man Coffee & Tea,Coffee Shop,40.717394,-74.010103,184 Duane St,4ea0afbf9adf1e334e4cc0e6
2,11,Jack’s Stir Brew Coffee,Coffee Shop,40.71645,-74.009755,139 Reade St,597a11ccb2958f056b27c60d
3,18,Starbucks,Coffee Shop,40.715534,-74.00903,125 Chambers St,4a718a5af964a52041d91fe3


We can see how number of common venues has dropped

### Visualization of common and equal distanced locations

In [140]:
venues_map = folium.Map(location=[latitude2, longitude2], zoom_start=13) 
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Your location',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

folium.features.CircleMarker(
    [latitude2, longitude2],
    radius=10,
    color='red',
    popup='Partners location',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the coffee shops as blue circle markers
for lat, lng, label in zip(duplicateRowsDF['Venue Latitide'], duplicateRowsDF['Venue Longitude'], duplicateRowsDF['Venue category']):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

##### Now it looks much better!

In [141]:
# Saving ids for getting their ratings
ids=duplicateRowsDF['id'].tolist()

In [142]:
# Function to get venues rating
def get_rating(venue_id):
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

    result = requests.get(url).json()
    try:
        return result['response']['venue']['rating']
    except:
        return(0)

In [143]:
score=[]
for vid in ids:
    score.append(get_rating(vid))
duplicateRowsDF['rating']=score

In [145]:
duplicateRowsDF.sort_values(['rating'], ascending = False)

Unnamed: 0,index,Venue,Venue category,Venue Latitide,Venue Longitude,Address,id,rating
1,10,Laughing Man Coffee & Tea,Coffee Shop,40.717394,-74.010103,184 Duane St,4ea0afbf9adf1e334e4cc0e6,8.8
2,11,Jack’s Stir Brew Coffee,Coffee Shop,40.71645,-74.009755,139 Reade St,597a11ccb2958f056b27c60d,7.5
3,18,Starbucks,Coffee Shop,40.715534,-74.00903,125 Chambers St,4a718a5af964a52041d91fe3,6.5
0,9,Hudson/Chambers Coffee Cart,Food Truck,40.715638,-74.009613,Chambers St,50a38463e4b0d1371722335f,0.0
