#### The Battle of Neighborhoods:

## New York vs Toronto

### New York

In [1]:
# Import packages
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [2]:
# Loading data
with open('Downloads/nyu_2451_34572-geojson.json') as json_data:
    newyork_data = json.load(json_data)

In [3]:
# Showing data 
newyork_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [4]:
# Define a new variable that includes features
neigh_ny_data = newyork_data['features']

In [5]:
# First item in this list 
neigh_ny_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

#### Tranform the data into a pandas dataframe

In [6]:
# Define the dataframe columns
column_names = ['BoroughNY', 'NeighborhoodNY', 'LatitudeNY', 'LongitudeNY'] 

# Instantiate the dataframe
neigh_newyork = pd.DataFrame(columns=column_names)


# Loop through the data and fill the dataframe one row at a time
for data in neigh_ny_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neigh_newyork = neigh_newyork.append({'BoroughNY': borough,
                                          'NeighborhoodNY': neighborhood_name,
                                          'LatitudeNY': neighborhood_lat,
                                          'LongitudeNY': neighborhood_lon}, ignore_index=True)
    
# Showing the result
neigh_newyork.head()

Unnamed: 0,BoroughNY,NeighborhoodNY,LatitudeNY,LongitudeNY
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


##### Use geopy library to get the latitude and longitude values of New York City

In [8]:
# Define a user_agent called ny_explorer 
address_NY = 'New York City, NY'
geolocator_NY = Nominatim(user_agent="ny_explorer")

# Define an instance of the geocoder.
location_NY = geolocator_NY.geocode(address_NY)
latitude_NY = location_NY.latitude
longitude_NY = location_NY.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude_NY, longitude_NY))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


##### Create a map of New York with neighborhoods superimposed on top

In [9]:
# Create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude_NY, longitude_NY], zoom_start=10)

# Add markers to map
for lat, lng, borough, neighborhood in zip(neigh_newyork['LatitudeNY'], neigh_newyork['LongitudeNY'], neigh_newyork['BoroughNY'], neigh_newyork['NeighborhoodNY']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

### Toronto

In [28]:
# Toronto 
!pip install beautifulsoup4
from bs4 import BeautifulSoup

# Url
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

# Sending the request
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(source, 'lxml')

# Setting up the table
table = soup.find("table")
table_rows = table.find_all("tr")

# Filling the table by loop
data = []
for tr in table_rows:
    td = tr.find_all("td")
    row = [tr.text for tr in td]
    
    # Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
    if row != [] and row[1] != "Not assigned\n":
        # If a cell has a borough but a "Not assigned" neighborhood, then the neighborhood will be the same as the borough.
        if "Not assigned\n" in row[2]: 
            row[2] = row[1]
        data.append(row)

# -Dataframe with 3 columns
df_1 = pd.DataFrame(data, columns = ["PostalCodeTO", "BoroughTO", "NeighborhoodTO"])
df_1.head()



Unnamed: 0,PostalCodeTO,BoroughTO,NeighborhoodTO
0,M3A\n,North York\n,Parkwoods\n
1,M4A\n,North York\n,Victoria Village\n
2,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
3,M6A\n,North York\n,"Lawrence Manor, Lawrence Heights\n"
4,M7A\n,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government\n"


##### Cleaning data

In [29]:
# Removing "\n" from column Neighborhood
df_1["NeighborhoodTO"] = df_1["NeighborhoodTO"].str.replace("\n","")
df_1.head()

Unnamed: 0,PostalCodeTO,BoroughTO,NeighborhoodTO
0,M3A\n,North York\n,Parkwoods
1,M4A\n,North York\n,Victoria Village
2,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront"
3,M6A\n,North York\n,"Lawrence Manor, Lawrence Heights"
4,M7A\n,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government"


In [30]:
# Removing "\n" from column PostalCode
df_1["PostalCodeTO"] = df_1["PostalCodeTO"].str.replace("\n","")
df_1.head()

Unnamed: 0,PostalCodeTO,BoroughTO,NeighborhoodTO
0,M3A,North York\n,Parkwoods
1,M4A,North York\n,Victoria Village
2,M5A,Downtown Toronto\n,"Regent Park, Harbourfront"
3,M6A,North York\n,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government"


In [31]:
# Removing "\n" from column Borough
df_1["BoroughTO"] = df_1["BoroughTO"].str.replace("\n","")
df_1.head() 

Unnamed: 0,PostalCodeTO,BoroughTO,NeighborhoodTO
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


##### Loading  a second dataset with Latitude & Longitude for Toronto

In [32]:
# Load data 
df_2 = pd.read_csv("Downloads/Geospatial_Coordinates.csv")
df_2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


##### Merge the first dataset with the second one, creating a new dataframe

In [45]:
# Creating a new dataframe joing the first two
df_toronto = pd.merge(df_1, df_2, how='left', left_on = "PostalCodeTO", right_on = "Postal Code")
# Remove the "Postal Code" column
df_toronto.drop("Postal Code", axis=1, inplace=True)
df_toronto.head()

Unnamed: 0,PostalCodeTO,BoroughTO,NeighborhoodTO,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


#### Getting coordinates of Toronto for creating a map

In [46]:
# Get the latitude & longitude of Toronto
address_TO = 'Toronto, ON'

# Define a user_agent called my_explorer
geolocator_TO = Nominatim(user_agent="to_explorer")
# Define an instance of the decoder
location_TO = geolocator_TO.geocode(address_TO)
latitude_TO = location_TO.latitude
longitude_TO = location_TO.longitude
print("The geographical coordinates of Toronto are {}, {}.".format(latitude_TO, longitude_TO))

The geographical coordinates of Toronto are 43.6534817, -79.3839347.


In [48]:
# Creating a map of Toronto
map_toronto = folium.Map(location=[latitude_TO,longitude_TO], zoom_start=10)
map_toronto

# Defining the values
neigh_toronto_data = df_toronto
column_names = ['BoroughTO', 'NeighborhoodTO', 'LatitudeTO', 'LongitudeTO']
neigh_toronto = pd.DataFrame(columns=column_names)

# Add markers to map
for lat, lng, Borough, Neighborhood in zip(neigh_toronto_data["Latitude"], neigh_toronto_data["Longitude"], neigh_toronto_data["BoroughTO"], neigh_toronto_data["NeighborhoodTO"]):
    label = '{}', '{}'.format(Neighborhood, Borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lng],
    radius=5,
    popup=label,
    color='green',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
    
map_toronto

In [49]:
# Shape df New York 
neigh_newyork.shape

(306, 4)

In [50]:
# Shape df Toronto 
df_toronto.shape

(103, 5)

In [51]:
neigh_newyork["NeighborhoodNY"].unique

<bound method Series.unique of 0                      Wakefield
1                     Co-op City
2                    Eastchester
3                      Fieldston
4                      Riverdale
5                    Kingsbridge
6                    Marble Hill
7                       Woodlawn
8                        Norwood
9                 Williamsbridge
10                    Baychester
11                Pelham Parkway
12                   City Island
13                  Bedford Park
14            University Heights
15                Morris Heights
16                       Fordham
17                  East Tremont
18                    West Farms
19                  High  Bridge
20                       Melrose
21                    Mott Haven
22                   Port Morris
23                      Longwood
24                   Hunts Point
25                    Morrisania
26                     Soundview
27                  Clason Point
28                   Throgs Neck
29          

In [52]:
df_toronto["NeighborhoodTO"].unique

<bound method Series.unique of 0                                              Parkwoods
1                                       Victoria Village
2                              Regent Park, Harbourfront
3                       Lawrence Manor, Lawrence Heights
4            Queen's Park, Ontario Provincial Government
5                Islington Avenue, Humber Valley Village
6                                         Malvern, Rouge
7                                              Don Mills
8                        Parkview Hill, Woodbine Gardens
9                               Garden District, Ryerson
10                                             Glencairn
11     West Deane Park, Princess Gardens, Martin Grov...
12                Rouge Hill, Port Union, Highland Creek
13                                             Don Mills
14                                      Woodbine Heights
15                                        St. James Town
16                                    Humewood-Cedarvale


In [53]:
# Mixing two dataset
df_mix = df_toronto[["NeighborhoodTO"]], neigh_newyork[["NeighborhoodNY"]]
df_mix

(                                        NeighborhoodTO
 0                                            Parkwoods
 1                                     Victoria Village
 2                            Regent Park, Harbourfront
 3                     Lawrence Manor, Lawrence Heights
 4          Queen's Park, Ontario Provincial Government
 5              Islington Avenue, Humber Valley Village
 6                                       Malvern, Rouge
 7                                            Don Mills
 8                      Parkview Hill, Woodbine Gardens
 9                             Garden District, Ryerson
 10                                           Glencairn
 11   West Deane Park, Princess Gardens, Martin Grov...
 12              Rouge Hill, Port Union, Highland Creek
 13                                           Don Mills
 14                                    Woodbine Heights
 15                                      St. James Town
 16                                  Humewood-Ce

In [54]:
df_newyork_cut = neigh_newyork[["NeighborhoodNY"]].head(100)
df_newyork_cut

Unnamed: 0,NeighborhoodNY
0,Wakefield
1,Co-op City
2,Eastchester
3,Fieldston
4,Riverdale
5,Kingsbridge
6,Marble Hill
7,Woodlawn
8,Norwood
9,Williamsbridge


In [55]:
df_toronto_cut = df_toronto[["NeighborhoodTO"]].head(100)
df_toronto_cut

Unnamed: 0,NeighborhoodTO
0,Parkwoods
1,Victoria Village
2,"Regent Park, Harbourfront"
3,"Lawrence Manor, Lawrence Heights"
4,"Queen's Park, Ontario Provincial Government"
5,"Islington Avenue, Humber Valley Village"
6,"Malvern, Rouge"
7,Don Mills
8,"Parkview Hill, Woodbine Gardens"
9,"Garden District, Ryerson"


In [56]:
df_mix_cut = df_toronto_cut, df_newyork_cut
df_mix_cut

(                                       NeighborhoodTO
 0                                           Parkwoods
 1                                    Victoria Village
 2                           Regent Park, Harbourfront
 3                    Lawrence Manor, Lawrence Heights
 4         Queen's Park, Ontario Provincial Government
 5             Islington Avenue, Humber Valley Village
 6                                      Malvern, Rouge
 7                                           Don Mills
 8                     Parkview Hill, Woodbine Gardens
 9                            Garden District, Ryerson
 10                                          Glencairn
 11  West Deane Park, Princess Gardens, Martin Grov...
 12             Rouge Hill, Port Union, Highland Creek
 13                                          Don Mills
 14                                   Woodbine Heights
 15                                     St. James Town
 16                                 Humewood-Cedarvale
 17  Ering

In [57]:
print("New York columns:", neigh_newyork.columns)

print("Toronto columns:", df_toronto.columns)

New York columns: Index(['BoroughNY', 'NeighborhoodNY', 'LatitudeNY', 'LongitudeNY'], dtype='object')
Toronto columns: Index(['PostalCodeTO', 'BoroughTO', 'NeighborhoodTO', 'Latitude', 'Longitude'], dtype='object')


In [59]:
# Creating a new dataframe joing the first two
df_mix2 = neigh_newyork[['BoroughNY']], df_toronto[["BoroughTO"]], neigh_newyork[['NeighborhoodNY']], df_toronto[["NeighborhoodTO"]], neigh_newyork[['LatitudeNY']], df_toronto[["Latitude"]], neigh_newyork[['LongitudeNY']], df_toronto[["Longitude"]]

In [60]:
# Showing the new dataset 
df_mix2

(         BoroughNY
 0            Bronx
 1            Bronx
 2            Bronx
 3            Bronx
 4            Bronx
 5            Bronx
 6        Manhattan
 7            Bronx
 8            Bronx
 9            Bronx
 10           Bronx
 11           Bronx
 12           Bronx
 13           Bronx
 14           Bronx
 15           Bronx
 16           Bronx
 17           Bronx
 18           Bronx
 19           Bronx
 20           Bronx
 21           Bronx
 22           Bronx
 23           Bronx
 24           Bronx
 25           Bronx
 26           Bronx
 27           Bronx
 28           Bronx
 29           Bronx
 30           Bronx
 31           Bronx
 32           Bronx
 33           Bronx
 34           Bronx
 35           Bronx
 36           Bronx
 37           Bronx
 38           Bronx
 39           Bronx
 40           Bronx
 41           Bronx
 42           Bronx
 43           Bronx
 44           Bronx
 45           Bronx
 46        Brooklyn
 47        Brooklyn
 48        Brooklyn


In [61]:
pd.concat([neigh_newyork, df_toronto], axis=0, ignore_index=True).head(100)

Unnamed: 0,BoroughNY,NeighborhoodNY,LatitudeNY,LongitudeNY,PostalCodeTO,BoroughTO,NeighborhoodTO,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201,,,,,
1,Bronx,Co-op City,40.874294,-73.829939,,,,,
2,Bronx,Eastchester,40.887556,-73.827806,,,,,
3,Bronx,Fieldston,40.895437,-73.905643,,,,,
4,Bronx,Riverdale,40.890834,-73.912585,,,,,
5,Bronx,Kingsbridge,40.881687,-73.902818,,,,,
6,Manhattan,Marble Hill,40.876551,-73.91066,,,,,
7,Bronx,Woodlawn,40.898273,-73.867315,,,,,
8,Bronx,Norwood,40.877224,-73.879391,,,,,
9,Bronx,Williamsbridge,40.881039,-73.857446,,,,,


In [62]:
# Creating a new dataframe with New York & Toronto data
df_mix3 = pd.concat([neigh_newyork, df_toronto], axis=1, ignore_index=True)
df_mix3

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,Bronx,Wakefield,40.894705,-73.847201,M3A,North York,Parkwoods,43.753259,-79.329656
1,Bronx,Co-op City,40.874294,-73.829939,M4A,North York,Victoria Village,43.725882,-79.315572
2,Bronx,Eastchester,40.887556,-73.827806,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,Bronx,Fieldston,40.895437,-73.905643,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,Bronx,Riverdale,40.890834,-73.912585,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,Bronx,Kingsbridge,40.881687,-73.902818,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,Manhattan,Marble Hill,40.876551,-73.91066,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,Bronx,Woodlawn,40.898273,-73.867315,M3B,North York,Don Mills,43.745906,-79.352188
8,Bronx,Norwood,40.877224,-73.879391,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,Bronx,Williamsbridge,40.881039,-73.857446,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [63]:
# Getting column names from the old datasets
print("New York columns:", neigh_newyork.columns)
print("Toronto columns:", df_toronto.columns)

New York columns: Index(['BoroughNY', 'NeighborhoodNY', 'LatitudeNY', 'LongitudeNY'], dtype='object')
Toronto columns: Index(['PostalCodeTO', 'BoroughTO', 'NeighborhoodTO', 'Latitude', 'Longitude'], dtype='object')


In [64]:
# Setting column names for the new dataframe just created
df_mix3.columns = ['BoroughNY', 'NeighborhoodNY', 'LatitudeNY', 'LongitudeNY', 'PostalCodeTO', 'BoroughTO', 'NeighborhoodTO', 'LatitudeTO', 'LongitudeTO']
df_mix3

Unnamed: 0,BoroughNY,NeighborhoodNY,LatitudeNY,LongitudeNY,PostalCodeTO,BoroughTO,NeighborhoodTO,LatitudeTO,LongitudeTO
0,Bronx,Wakefield,40.894705,-73.847201,M3A,North York,Parkwoods,43.753259,-79.329656
1,Bronx,Co-op City,40.874294,-73.829939,M4A,North York,Victoria Village,43.725882,-79.315572
2,Bronx,Eastchester,40.887556,-73.827806,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,Bronx,Fieldston,40.895437,-73.905643,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,Bronx,Riverdale,40.890834,-73.912585,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,Bronx,Kingsbridge,40.881687,-73.902818,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,Manhattan,Marble Hill,40.876551,-73.91066,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,Bronx,Woodlawn,40.898273,-73.867315,M3B,North York,Don Mills,43.745906,-79.352188
8,Bronx,Norwood,40.877224,-73.879391,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,Bronx,Williamsbridge,40.881039,-73.857446,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [65]:
# Removing the column PostalCodeTo
df_mix3.drop('PostalCodeTO', axis=1, inplace=True)
df_mix3

Unnamed: 0,BoroughNY,NeighborhoodNY,LatitudeNY,LongitudeNY,BoroughTO,NeighborhoodTO,LatitudeTO,LongitudeTO
0,Bronx,Wakefield,40.894705,-73.847201,North York,Parkwoods,43.753259,-79.329656
1,Bronx,Co-op City,40.874294,-73.829939,North York,Victoria Village,43.725882,-79.315572
2,Bronx,Eastchester,40.887556,-73.827806,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,Bronx,Fieldston,40.895437,-73.905643,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,Bronx,Riverdale,40.890834,-73.912585,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,Bronx,Kingsbridge,40.881687,-73.902818,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,Manhattan,Marble Hill,40.876551,-73.91066,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,Bronx,Woodlawn,40.898273,-73.867315,North York,Don Mills,43.745906,-79.352188
8,Bronx,Norwood,40.877224,-73.879391,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,Bronx,Williamsbridge,40.881039,-73.857446,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [66]:
df_mix3['BoroughNY'].count()

306

In [67]:
df_mix3['NeighborhoodNY'].count()

306

In [68]:
df_mix3['BoroughTO'].count()

103

In [69]:
df_mix3['NeighborhoodTO'].count()

103

In [70]:
df_mix3['BoroughNY'].unique()

array(['Bronx', 'Manhattan', 'Brooklyn', 'Queens', 'Staten Island'],
      dtype=object)

In [71]:
df_mix3['NeighborhoodNY'].unique()

array(['Wakefield', 'Co-op City', 'Eastchester', 'Fieldston', 'Riverdale',
       'Kingsbridge', 'Marble Hill', 'Woodlawn', 'Norwood',
       'Williamsbridge', 'Baychester', 'Pelham Parkway', 'City Island',
       'Bedford Park', 'University Heights', 'Morris Heights', 'Fordham',
       'East Tremont', 'West Farms', 'High  Bridge', 'Melrose',
       'Mott Haven', 'Port Morris', 'Longwood', 'Hunts Point',
       'Morrisania', 'Soundview', 'Clason Point', 'Throgs Neck',
       'Country Club', 'Parkchester', 'Westchester Square', 'Van Nest',
       'Morris Park', 'Belmont', 'Spuyten Duyvil', 'North Riverdale',
       'Pelham Bay', 'Schuylerville', 'Edgewater Park', 'Castle Hill',
       'Olinville', 'Pelham Gardens', 'Concourse', 'Unionport',
       'Edenwald', 'Bay Ridge', 'Bensonhurst', 'Sunset Park',
       'Greenpoint', 'Gravesend', 'Brighton Beach', 'Sheepshead Bay',
       'Manhattan Terrace', 'Flatbush', 'Crown Heights', 'East Flatbush',
       'Kensington', 'Windsor Terrace', 'Pro

In [73]:
df_mix3['BoroughTO'].unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga', nan], dtype=object)

In [74]:
df_mix3['NeighborhoodTO'].unique()

array(['Parkwoods', 'Victoria Village', 'Regent Park, Harbourfront',
       'Lawrence Manor, Lawrence Heights',
       "Queen's Park, Ontario Provincial Government",
       'Islington Avenue, Humber Valley Village', 'Malvern, Rouge',
       'Don Mills', 'Parkview Hill, Woodbine Gardens',
       'Garden District, Ryerson', 'Glencairn',
       'West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale',
       'Rouge Hill, Port Union, Highland Creek', 'Woodbine Heights',
       'St. James Town', 'Humewood-Cedarvale',
       'Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood',
       'Guildwood, Morningside, West Hill', 'The Beaches', 'Berczy Park',
       'Caledonia-Fairbanks', 'Woburn', 'Leaside', 'Central Bay Street',
       'Christie', 'Cedarbrae', 'Hillcrest Village',
       'Bathurst Manor, Wilson Heights, Downsview North',
       'Thorncliffe Park', 'Richmond, Adelaide, King',
       'Dufferin, Dovercourt Village', 'Scarborough Village',
       'Fairview, H

In [75]:
df_mix3.groupby(["BoroughNY", "BoroughTO"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,NeighborhoodNY,LatitudeNY,LongitudeNY,NeighborhoodTO,LatitudeTO,LongitudeTO
BoroughNY,BoroughTO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bronx,Downtown Toronto,10,10,10,10,10,10
Bronx,East Toronto,2,2,2,2,2,2
Bronx,East York,5,5,5,5,5,5
Bronx,Etobicoke,3,3,3,3,3,3
Bronx,North York,13,13,13,13,13,13
Bronx,Scarborough,7,7,7,7,7,7
Bronx,West Toronto,3,3,3,3,3,3
Bronx,York,2,2,2,2,2,2
Brooklyn,Central Toronto,9,9,9,9,9,9
Brooklyn,Downtown Toronto,9,9,9,9,9,9


In [76]:
df_mix3.groupby(["BoroughNY", "NeighborhoodNY", "LatitudeNY", "LongitudeNY"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,BoroughTO,NeighborhoodTO,LatitudeTO,LongitudeTO
BoroughNY,NeighborhoodNY,LatitudeNY,LongitudeNY,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bronx,Allerton,40.865788,-73.859319,0,0,0,0
Bronx,Baychester,40.866858,-73.835798,1,1,1,1
Bronx,Bedford Park,40.870185,-73.885512,1,1,1,1
Bronx,Belmont,40.857277,-73.888452,1,1,1,1
Bronx,Bronxdale,40.852723,-73.861726,0,0,0,0
Bronx,Castle Hill,40.819014,-73.848027,1,1,1,1
Bronx,City Island,40.847247,-73.786488,1,1,1,1
Bronx,Claremont Village,40.831428,-73.901199,0,0,0,0
Bronx,Clason Point,40.806551,-73.854144,1,1,1,1
Bronx,Co-op City,40.874294,-73.829939,1,1,1,1


In [77]:
df_mix3.sort_values(["BoroughNY", "NeighborhoodNY", "LatitudeNY", "LongitudeNY"])

Unnamed: 0,BoroughNY,NeighborhoodNY,LatitudeNY,LongitudeNY,BoroughTO,NeighborhoodTO,LatitudeTO,LongitudeTO
298,Bronx,Allerton,40.865788,-73.859319,,,,
10,Bronx,Baychester,40.866858,-73.835798,North York,Glencairn,43.709577,-79.445073
13,Bronx,Bedford Park,40.870185,-73.885512,North York,Don Mills,43.7259,-79.340923
34,Bronx,Belmont,40.857277,-73.888452,North York,"Northwood Park, York University",43.76798,-79.487262
297,Bronx,Bronxdale,40.852723,-73.861726,,,,
40,Bronx,Castle Hill,40.819014,-73.848027,North York,Downsview,43.737473,-79.464763
12,Bronx,City Island,40.847247,-73.786488,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
267,Bronx,Claremont Village,40.831428,-73.901199,,,,
27,Bronx,Clason Point,40.806551,-73.854144,North York,Hillcrest Village,43.803762,-79.363452
1,Bronx,Co-op City,40.874294,-73.829939,North York,Victoria Village,43.725882,-79.315572


In [78]:
# Creating a new df for New York radians
neigh_newyork_radians = df_mix3[['BoroughNY', 'NeighborhoodNY', 'LatitudeNY', 'LongitudeNY']]
neigh_newyork_radians 

Unnamed: 0,BoroughNY,NeighborhoodNY,LatitudeNY,LongitudeNY
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
5,Bronx,Kingsbridge,40.881687,-73.902818
6,Manhattan,Marble Hill,40.876551,-73.91066
7,Bronx,Woodlawn,40.898273,-73.867315
8,Bronx,Norwood,40.877224,-73.879391
9,Bronx,Williamsbridge,40.881039,-73.857446


In [81]:
# Radians New York
neigh_newyork_radians['RadLatNY'] = np.radians(neigh_newyork_radians['LatitudeNY'])
neigh_newyork_radians['RadLngNY'] = np.radians(neigh_newyork_radians['LongitudeNY'])
neigh_newyork_radians

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,BoroughNY,NeighborhoodNY,LatitudeNY,LongitudeNY,RadLatNY,RadLngNY
0,Bronx,Wakefield,40.894705,-73.847201,0.713747,-1.288877
1,Bronx,Co-op City,40.874294,-73.829939,0.713391,-1.288576
2,Bronx,Eastchester,40.887556,-73.827806,0.713622,-1.288538
3,Bronx,Fieldston,40.895437,-73.905643,0.71376,-1.289897
4,Bronx,Riverdale,40.890834,-73.912585,0.71368,-1.290018
5,Bronx,Kingsbridge,40.881687,-73.902818,0.71352,-1.289848
6,Manhattan,Marble Hill,40.876551,-73.91066,0.71343,-1.289984
7,Bronx,Woodlawn,40.898273,-73.867315,0.71381,-1.289228
8,Bronx,Norwood,40.877224,-73.879391,0.713442,-1.289439
9,Bronx,Williamsbridge,40.881039,-73.857446,0.713509,-1.289056
