# Neighborhoods of both New York city and Toronto.

In [1]:
# Load primary libraries
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
import json # library to handle JSON files

# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge beautifulsoup4 --yes # get BeautifulSoup package
from bs4 import BeautifulSoup # to parse html file

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.11

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          90 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.20.0-py_0 conda-forge


Downloading and Extracting Packages
geopy-1.20.0         | 57 KB     | ##################################### | 100% 
geographiclib-1.49   | 32 KB     | ##

# D1. Define coordinates for the cities of New york city and Toronto

In [2]:
geolocator = Nominatim(user_agent="on_explorer")

n_address = 'New York City, NY'
n_location = geolocator.geocode(n_address)
n_latitude = n_location.latitude
n_longitude = n_location.longitude

t_address = 'Toronto, ON'
t_location = geolocator.geocode(t_address)
t_latitude = t_location.latitude
t_longitude = t_location.longitude

# create map of New York city using latitude and longitude values
map_ny = folium.Map(location=[n_latitude, n_longitude], zoom_start=10)
#map_ny

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[t_latitude, t_longitude], zoom_start=10)
#map_toronto


# D2a. Get Borough and neighborhood data for New york city

In [11]:
# Get from online : New york data
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset

# Get data from json file
with open('newyork_data.json') as json_data:
    ny_data = json.load(json_data)

# Get list of neighborhood from feature tag
ny_neighborhoods_data = ny_data['features']

# define the dataframe columns
ny_column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
ny_neighborhoods = pd.DataFrame(columns=ny_column_names)

for data in ny_neighborhoods_data:
    ny_borough = ny_neighborhood_name = data['properties']['borough'] 
    ny_neighborhood_name = data['properties']['name']
        
    ny_neighborhood_latlon = data['geometry']['coordinates']
    ny_neighborhood_lat = ny_neighborhood_latlon[1]
    ny_neighborhood_lon = ny_neighborhood_latlon[0]
    
    ny_neighborhoods = ny_neighborhoods.append({'Borough': ny_borough,
                                          'Neighborhood': ny_neighborhood_name,
                                          'Latitude': ny_neighborhood_lat,
                                          'Longitude': ny_neighborhood_lon}, ignore_index=True)
ny_neighborhoods.head()


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


# D2b. Get Borough and neighborhood data for Toronto

In [21]:
# get data from wikipedia for all postal codes located with Toronto city
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

# convert to beautiful soup 
soup = BeautifulSoup(source, 'html.parser')

# this method return the index when element is found and return -1 when element is not found
def find_in_list(lst, value):
    try:
        return lst.index(value)
    except:
        return -1

# find the table in the page
table = soup.find('table', class_="wikitable sortable")

# define lists to store values in each table column
pc=[] # list for postcode
bh=[] # list for borough
nd=[] # list for neighborhood

# loop through each row in the wiki table
for row in table.find_all("tr"):
    try:
        # get the column values in each row
        col = row.find_all("td")
        
        # rule #1: Ignore cells with a borough that is Not assigned.
        if col[1].text != "Not assigned" :
            # rule #2: when multiple neighborhood can exist in one postalcode area, 
            #          combine neignbordhood with commas
            idx = find_in_list(pc, col[0].text)
            #print (idx)
            if idx > 0:
                nd[idx] = nd[idx] + "," + col[2].text.replace('\n','') # clean column value
                
            else:
                pc.append(col[0].text)
                bh.append(col[1].text)
                # rule #3: If a cell has a borough but a 'Not assigned' neighborhood, 
                #          then the neighborhood will be the same as the borough.
                if col[2].text.replace('\n','') == "Not assigned":
                    nd.append(col[1].text)
                else:
                    nd.append(col[2].text.replace('\n','')) # clean column value
            
    except Exception as e:
        pass

# define column names    
column=['PostalCode', 'Borough', 'Neighborhood']

# create a dataframe from the data
tto=pd.DataFrame({column[0]:pc,column[1]:bh, column[2]:nd})

# display some rows in the dataframe
#print(tto.head())

# save dataframe to csv file
tto.to_csv('temp_toronto_postcodes_borough_neighborhood.csv',index=None)

# coordinates dataframe 
coordDf = pd.read_csv('Toronto_Geospatial_Coordinates.csv')
#coordDf.head()

# Rename the columns
coordDf.rename(columns={"Postal Code": "PostalCode"}, inplace=True) ## 
#coordDf.head()

# Merge the two dataframes
mg = pd.merge(tto, coordDf, on='PostalCode' )
mg.drop(['PostalCode'], axis=1,inplace=True)
mg.head()


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,North York,Parkwoods,43.753259,-79.329656
1,North York,Victoria Village,43.725882,-79.315572
2,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
3,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,Queen's Park,Queen's Park,43.662301,-79.389494


In [None]:
#D3. Use Foursquare and acquire data for the following features.
a.> Pizza Place (Food)
b.> Coffee Place
c.> Parks / Trails
d.> Nightlife
e.> Monument / Landmark

# Create a map of New York with neighborhoods superimposed on top.

In [None]:

# add markers to map
for lat, lng, borough, neighborhood in zip(ny_neighborhoods['Latitude'], ny_neighborhoods['Longitude'], ny_neighborhoods['Borough'], ny_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)
    



In [None]:
from folium import plugins

# let's start again with a clean copy of the map of San Francisco
ny_map = folium.Map(location = [n_latitude, n_longitude], zoom_start = 12)

# instantiate a mark cluster object for the incidents in the dataframe
incidents = plugins.MarkerCluster().add_to(ny_map)

# loop through the dataframe and add each data point to the mark cluster
for lat, lng, label, in zip(df_incidents.Y, df_incidents.X, df_incidents.Category):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(incidents)

# display map
sanfran_map