THIRD PART: EXPLORE DATA

Import libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the\
#Foursquare API lab
import folium # map rendering library

print('Libraries imported')

Libraries imported


Read csv created in SECOND PART

In [4]:
df_toronto = pd.read_csv('toronto.csv')

Create a map

In [6]:
address = 'Toronto,Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude_x = location.latitude
longitude_y = location.longitude
tormap = folium.Map(location=[latitude_x, longitude_y], zoom_start=10)
for lat, lng, bor, nei in zip(df_toronto['Latitude'], df_toronto['Longitude'], \
                              df_toronto['Borough'], df_toronto['Neighborhood']):
    
    label = '{}, {}'.format(nei, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(tormap)  
    
tormap

Set Foursquare Personal Info

In [7]:
#Code deleted for including personal Foursquare info

Create Scaborough df and map (same as before)

In [9]:
df_scar = df_toronto[df_toronto['Borough'] == 'Scarborough'].reset_index(drop=True)
df_scar.head()

Unnamed: 0.1,Unnamed: 0,Postalcode,Borough,Neighborhood,Longitude,Latitude
0,0,M1B,Scarborough,"Rouge, Malvern",-79.195561,43.81165
1,1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",-79.158701,43.785605
2,2,M1E,Scarborough,"Guildwood, Morningside, West Hill",-79.175299,43.76569
3,3,M1G,Scarborough,Woburn,-79.21761,43.768216
4,4,M1H,Scarborough,Cedarbrae,-79.23944,43.769608


In [10]:
add_scar = 'Scarborough,Toronto'
lat_scar = 43.773077
long_scar = -79.257774
scarmap = folium.Map(location=[lat_scar, long_scar], zoom_start=12)
for lat, lng, label in zip(df_scar['Latitude'], df_scar['Longitude'],\
                           df_scar['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(scarmap)  
    
scarmap

Explore the second neighborhood in the df

In [31]:
neighborhood_latitude = df_scar.loc[1, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_scar.loc[1, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_scar.loc[1, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Highland Creek, Rouge Hill, Port Union are 43.785605000000025, -79.15870110299994.


Get the top 50 venues that are in Highland within a radius of 500 meters

In [32]:
LIMIT = 50 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}\
&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.\
format(CLIENT_ID, CLIENT_SECRET, lat_scar, long_scar, VERSION, radius, LIMIT)

In [33]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c5a4e364c1f6744ecacfef0'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Scarborough City Centre',
  'headerFullLocation': 'Scarborough City Centre, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 41,
  'suggestedBounds': {'ne': {'lat': 43.7775770045, 'lng': -79.25155367954714},
   'sw': {'lat': 43.7685769955, 'lng': -79.26399432045285}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5085ec39e4b0b1ead2eb0818',
       'name': 'Disney Store',
       'location': {'address': '300 Borough Drive',
        'crossStreet': 'in Scarborough Town Centre',
        'lat': 43.775537,
        'lng': -79.256833,
        'labeledLa

In [34]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [35]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Disney Store,Toy / Game Store,43.775537,-79.256833
1,Tommy Hilfiger Company Store,Clothing Store,43.776015,-79.257369
2,DAVIDsTEA,Tea Room,43.776613,-79.258516
3,American Eagle Outfitters,Clothing Store,43.775908,-79.258352
4,SEPHORA,Cosmetics Shop,43.775592,-79.258242


In [36]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

41 venues were returned by Foursquare.


Define a f(x) for all neighborhoods

In [37]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret\
        ={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Retrieve venues for all neighborhoods

In [38]:
scar_venues = getNearbyVenues(names=df_scar['Neighborhood'],
                                   latitudes=df_scar['Latitude'],
                                   longitudes=df_scar['Longitude']
                                  )

Rouge, Malvern


KeyError: 'groups'

In [None]:
GOT THE KEYERROR GROUPS ERROR, NOT ABLE TO FIX IT