In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url, 'lxml')

In [3]:
A = []
B = []
C = []
table = soup.find('table', class_="wikitable sortable")

In [4]:
for row in table.findAll('tr'):
    cells = row.findAll('td')
    if len(cells) > 0: #this excludes the column titles which have no td values
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

In [5]:
df = pd.DataFrame(A , columns = ['PostalCode'])
df['Borough'] = B
df['Neighborhood'] = C
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [6]:
df2 = df[df['Borough'] != 'Not assigned']
df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [7]:
df3 = df2.groupby(['PostalCode', 'Borough']).agg(lambda col: ','.join(col))
df3.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood
PostalCode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"Guildwood\n,Morningside,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [8]:
df3["Neighborhood"]= df3["Neighborhood"].replace('\n', '', regex=True)

In [9]:
for index, row in df3.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = index[1]

In [10]:
df4 = df3.reset_index()
df4
df4.shape

(103, 3)

In [11]:
geo_url="http://cocl.us/Geospatial_data"
geo_data=pd.read_csv(geo_url)
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
lat = []
long = []
for index, row in df4.iterrows():
    value = str(row['PostalCode'])
    geo_data1 = geo_data[geo_data['Postal Code'] == value]
    Latitude = float(geo_data1['Latitude'])
    Longitude = float(geo_data1['Longitude'])
    
    lat.append(Latitude)
    long.append(Longitude)

In [13]:
df4['Latitude'] = lat
df4['Longitude'] = long
df4.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [14]:
#!conda install -c conda-forge folium=0.5.0  
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np

In [15]:
map_Toronto = folium.Map(location=[43.6532,-79.3832], zoom_start=10)
for lat, lng, borough, neighborhood in zip(df4['Latitude'], df4['Longitude'], df4['Borough'], df4['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_Toronto)  
map_Toronto

In [16]:
df5 = df4['Borough'].unique()
df5
df5_list = df5.tolist()

In [17]:
Borough_Code = []
for borough in df4['Borough']:
    Borough_Code.append(df5_list.index(borough))

In [18]:
df4['Borough Code'] = Borough_Code
df4.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Borough Code
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,0
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,0
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,0
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0


In [19]:
colors_array = cm.rainbow(np.linspace(0, len(df5) - 1))
rainbow = [colors.rgb2hex(i) for i in colors_array]

In [20]:
map_Toronto2 = folium.Map(location=[43.6532,-79.3832], zoom_start=10)
for lat, lng, borough, neighborhood, borough_code in zip(df4['Latitude'], df4['Longitude'], df4['Borough'], df4['Neighborhood'], df4['Borough Code']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[borough_code-1],
        fill=True,
        fill_color=rainbow[borough_code-1],
        fill_opacity=0.7).add_to(map_Toronto2)  
map_Toronto2

### NYC neighborhoods

In [22]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [23]:
with open('nyu-2451-34572-geojson.json') as json_data:
    newyork_data = json.load(json_data)

In [24]:
newyork_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [25]:
neighborhoods_data = newyork_data['features']

In [26]:
neighborhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

In [27]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [28]:
neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


In [29]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [30]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [31]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


In [33]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
nylatitude = location.latitude
nylongitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(nylatitude, nylongitude))

The geograpical coordinate of New York City are 40.7308619, -73.9871558.


In [35]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[nylatitude, nylongitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [52]:
CLIENT_ID = 'XUDICXZVCI5MBPTAR5U1KHNLBVGVKS3ZOOZ5S2223MZEQIZH' # your Foursquare ID
CLIENT_SECRET = 'KUTHVVT30CLQ3XCXSJGFWGHMTBHTSX2HPKJD1RQDF42YGKXE' # your Foursquare Secret
VERSION = '20190311' # Foursquare API version

print('My credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

My credentails:
CLIENT_ID: XUDICXZVCI5MBPTAR5U1KHNLBVGVKS3ZOOZ5S2223MZEQIZH
CLIENT_SECRET:KUTHVVT30CLQ3XCXSJGFWGHMTBHTSX2HPKJD1RQDF42YGKXE


In [53]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [54]:
LIMIT = 100
RADIUS = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&radius={}&v={}&ll={},{}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    RADIUS,
    nylatitude, 
    nylongitude,  
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=XUDICXZVCI5MBPTAR5U1KHNLBVGVKS3ZOOZ5S2223MZEQIZH&client_secret=KUTHVVT30CLQ3XCXSJGFWGHMTBHTSX2HPKJD1RQDF42YGKXE&radius=20190311&v=500&ll=40.7308619,-73.9871558&limit=100'

In [55]:
results = requests.get(url).json()
results

{'meta': {'code': 410,
  'errorType': 'param_error',
  'errorDetail': 'The Foursquare API no longer supports requests that pass in a version v <= 20120609. For more details see https://developer.foursquare.com/overview/versioning',
  'requestId': '5c86c1d5351e3d13a6986892'},
 'response': {}}

In [46]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

KeyError: 'groups'

In [41]:
print(nearby_venues)

NameError: name 'nearby_venues' is not defined

In [41]:
# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.tail()

Unnamed: 0,name,categories,lat,lng
95,Stuffed Ice Cream,Ice Cream Shop,40.727959,-73.985162
96,Liquiteria,Juice Bar,40.733606,-73.989922
97,Video Games New York,Video Game Store,40.728304,-73.990502
98,Zadie's Oyster Room,Seafood Restaurant,40.72975,-73.98285
99,Abraço,Coffee Shop,40.727243,-73.986099


In [42]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [43]:
print('There are {} uniques categories.'.format(len(nearby_venues['categories'].unique())))

There are 65 uniques categories.


In [47]:
nearby_venues[nearby_venues['categories'].str.contains("Restaurant")==True]

Unnamed: 0,name,categories,lat,lng
1,Momofuku Ssäm Bar,Asian Restaurant,40.731711,-73.985571
2,Han Dynasty,Chinese Restaurant,40.73213,-73.98809
9,Veselka,Ukrainian Restaurant,40.729124,-73.986993
15,The Smith,American Restaurant,40.731185,-73.988672
19,Shabu-Tatsu,Shabu-Shabu Restaurant,40.72941,-73.985858
21,Curry-Ya,Japanese Curry Restaurant,40.729445,-73.985973
26,Kanoyama,Sushi Restaurant,40.730476,-73.986326
27,Sobaya,Soba Restaurant,40.729575,-73.987977
28,Beyond Sushi,Vegetarian / Vegan Restaurant,40.73281,-73.986002
29,Hasaki,Sushi Restaurant,40.729888,-73.988761
