## Purchasing a Property in Alberta

In this project, we will determine the best neighborhood in Alberta to purchase a rental property based on a amenties within a 1-kilometer proximity. 

#### Install/import all packages required for this project

In [1]:
#Use beautiful soup method
!pip install bs4
from bs4 import BeautifulSoup # this module helps in web scrapping.
import requests  # this module helps us to download a web page

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import numpy as np # library to handle data in a vectorized manner

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.






#### Begin collecting data from the Wikipedia Article for Neighborhoods in Alberta

In [2]:
#Url containing data regarding Alberta postal codes, boroughs, neighborhoods, latitiude and longitude.
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_T"

In [3]:
#Obtain contents of the webpage in text format
data  = requests.get(url).text

#Start converting into a readable html format
soup = BeautifulSoup(data,"html5lib")

#Find all HTML tables in the web page
tables = soup.find_all('table')


In [4]:
#Turn the data into a dataframe
alberta_data = pd.DataFrame(columns=["Postal Code", "Borough", "Neighbourhood", "Latitude", "Longitude"])

for row in tables[1].tbody.find_all("tr"):
    col = row.find_all("td")
    if (col != []):
        postal_code = col[0].text.strip()
        borough = col[1].text.strip()
        neighbourhood = col[2].text.strip()
        latitude = col[3].text.strip()
        longitude = col[4].text.strip()
        alberta_data = alberta_data.append({"Postal Code":postal_code, "Borough":borough, "Neighbourhood":neighbourhood, "Latitude":latitude, "Longitude":longitude}, ignore_index=True)

alberta_data.tail()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
175,T5Z,Edmonton,West Lake District,53.5966,-113.4882
176,T6Z,Not assigned,Not assigned,Not assigned,Not assigned
177,T7Z,Stony Plain,Not assigned,53.5202,-114.0135
178,T8Z,Not assigned,Not assigned,Not assigned,Not assigned
179,T9Z,Not assigned,Not assigned,Not assigned,Not assigned


In [5]:
# Remove all rows that have "Not assigned"
alberta_data = alberta_data[alberta_data.Borough != 'Not assigned']
alberta_data = alberta_data[alberta_data.Neighbourhood != 'Not assigned']
alberta_data = alberta_data[alberta_data.Latitude != 'Not assigned']
alberta_data.tail()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
162,T1Y,Calgary,"Rundle, Whitehorn, Monterey Park",51.0759,-114.0015
163,T2Y,Calgary,"Millrise, Somerset, Bridlewood, Evergreen",50.9093,-114.0721
166,T5Y,Edmonton,"Horse Hill, East Lake District",53.6026,-113.3837
172,T2Z,Calgary,"Douglas Glen, McKenzie Lake, Copperfield, East...",50.9023,-113.9873
175,T5Z,Edmonton,West Lake District,53.5966,-113.4882


In [6]:
#Check dtypes
print(alberta_data.dtypes)

Postal Code      object
Borough          object
Neighbourhood    object
Latitude         object
Longitude        object
dtype: object


In [7]:
#Clean up the latitude and longitude colums so they display a "float" value
alberta_data['Latitude'] = pd.to_numeric(alberta_data['Latitude'])
alberta_data['Longitude'] = pd.to_numeric(alberta_data['Longitude'])
alberta_data.tail()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
162,T1Y,Calgary,"Rundle, Whitehorn, Monterey Park",51.0759,-114.0015
163,T2Y,Calgary,"Millrise, Somerset, Bridlewood, Evergreen",50.9093,-114.0721
166,T5Y,Edmonton,"Horse Hill, East Lake District",53.6026,-113.3837
172,T2Z,Calgary,"Douglas Glen, McKenzie Lake, Copperfield, East...",50.9023,-113.9873
175,T5Z,Edmonton,West Lake District,53.5966,-113.4882


In [8]:
#Re-Check dtypes
print(alberta_data.dtypes)

Postal Code       object
Borough           object
Neighbourhood     object
Latitude         float64
Longitude        float64
dtype: object


In [9]:
alberta_data.groupby('Borough').count()

Unnamed: 0_level_0,Postal Code,Neighbourhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Airdrie,2,2,2,2
Calgary,34,34,34,34
Edmonton,38,38,38,38
Fort McMurray,3,3,3,3
Grande Prairie,3,3,3,3
Leduc,1,1,1,1
Lethbridge,3,3,3,3
Medicine Hat,3,3,3,3
Red Deer,3,3,3,3
Sherwood Park,6,6,6,6


In [10]:
#alberta_data_yeg = alberta_data[alberta_data.Borough == 'Edmonton']
#alberta_data_yyc = alberta_data[alberta_data.Borough == 'Calgary']

In [11]:
#alberta_data_yeg.head()

In [12]:
#alberta_data_yyc.head()

In [13]:
#alberta_data = pd.concat([alberta_data_yeg, alberta_data_yyc], ignore_index=True)
#alberta_data = alberta_data_yeg
#alberta_data.tail()

In [14]:
#alberta_data.groupby('Borough').count()

#### Looking at the data we have currently collected in a map form

In [15]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(alberta_data['Borough'].unique()),
        alberta_data.shape[0]
    )
)

The dataframe has 11 boroughs and 97 neighborhoods.


In [16]:
address = 'Alberta, Canada'

geolocator = Nominatim(user_agent="alberta_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Alberta are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Alberta are 55.001251, -115.002136.


In [17]:
# create map of Alberta using latitude and longitude values
map_alberta = folium.Map(location=[latitude, longitude], zoom_start=5.5)

# add markers to map
for lat, lng, borough, neighbourhood in zip(alberta_data['Latitude'], alberta_data['Longitude'], alberta_data['Borough'], alberta_data['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_alberta)  
    
map_alberta

#### Get data from Foursquare

In [29]:
#Foursquare credentials

CLIENT_ID = 'CPABSE2VIDC2M0ATA2EWJFC1OMY4DP0SFTUIFIHFUTDEUEIH' # your Foursquare ID
CLIENT_SECRET = 'U503LR42EI0KE3GEJVSJO1WYQCH42ZF114RHNMNUHXJZBXES' # your Foursquare Secret
VERSION = '20210101' # Foursquare API version
LIMIT = 500 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: CPABSE2VIDC2M0ATA2EWJFC1OMY4DP0SFTUIFIHFUTDEUEIH
CLIENT_SECRET:U503LR42EI0KE3GEJVSJO1WYQCH42ZF114RHNMNUHXJZBXES


In [30]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            latitude, 
            longitude, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [31]:
alberta_venues = getNearbyVenues(names=alberta_data['Neighbourhood'],
                                   latitudes=alberta_data['Latitude'],
                                   longitudes=alberta_data['Longitude']
                                  )

Central Medicine Hat
Penbrooke Meadows, Marlborough
Dalhousie, Edgemont, Hamptons, Hidden Valley
East Airdrie
West Clareview, East Londonderry
North Capilano
West Sherwood Park
South Medicine Hat
Forest Lawn, Dover, Erin Woods
Montgomery, Bowness, Silver Springs, Greenwood
West Airdrie
East North Central, West Beverly
SE Capilano, West Southeast Industrial, East Bonnie Doon
Outer Southwest
North Medicine Hat
Lynnwood Ridge, Ogden, Foothills Industrial, Great Plains
Rosscarrock, Westgate, Wildwood, Shaganappi, Sunalta
Central Londonderry
Central Bonnie Doon
Inner Southwest Sherwood Park
Bridgeland, Greenview, Zoo, YYC
Lakeview, Glendale, Killarney, Glamorgan
West Londonderry, East Calder
South Bonnie Doon, East University
Central Sherwood Park (Ardrossan)
Leduc (Includes YEG)
Inglewood, Burnsland, Chinatown, East Victoria Park, Saddledome
Hawkwood, Arbour Lake, Citadel, Ranchlands, Royal Oak, Rocky Ridge
North Central, Queen Mary Park, Blatchford
West University, Strathcona Place
East S

ValueError: Length mismatch: Expected axis has 0 elements, new values have 7 elements