In [3]:
import pandas as pd
import os
from datetime import datetime
import geopandas as gpd
from shapely.geometry import Point
from shapely import wkt
import matplotlib.pyplot as plt
from shapely.geometry import Polygon
import folium
import math

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')


First to import all Vancouver facilities' csv files

In [6]:
# Load other CSV files directly into DataFrames
pubs_and_restuarent = pd.read_csv('vancouver_Pubs and restuarents.csv')
railway_station_and_stop = pd.read_csv('vancouver_Railway Stations and  Railway Stops.csv')
supermarket = pd.read_csv('vancouver_Supermarkets.csv')
bus_station_and_stop = pd.read_csv('vancouver_Bus Stations and Bus Stops.csv')
hospital = pd.read_csv('vancouver_Hospitals.csv')
school_university_colleges = pd.read_csv('vancouver_Schools_university_colleges.csv')
cafe = pd.read_csv('van-cafe.csv')
cinema = pd.read_csv('van-cinema.csv')
dentist = pd.read_csv('van-dentist.csv')
fast_food = pd.read_csv('van-fast_food.csv')
kindergartens = pd.read_csv('van-kindergartens.csv')
malls = pd.read_csv('van-malls.csv')
play_ground = pd.read_csv('van-paly_grounds.csv')
parks = pd.read_csv('van-parks.csv')
post_office = pd.read_csv('van-post_office.csv')

# Dictionary containing DataFrames directly for iteration
csv_dataframes = {
    "hospital": hospital,
    "pubs_and_restuarent": pubs_and_restuarent,
    "railway_station_and_stop": railway_station_and_stop,
    "school_university_colleges": school_university_colleges,
    "supermarket": supermarket,
    "bus_station_and_stop": bus_station_and_stop,
    "cafe": cafe,
    "cinema": cinema,
    "dentist": dentist,
    "fast_food": fast_food,
    "kindergartens": kindergartens,
    "malls": malls,
    "play_ground": play_ground,
    "parks": parks,
    "post_office": post_office,
}

Then we will filter out the data we don't need and add all other data together

In [9]:
# Selected relevant columns to extract
extract_data = ["fclass", "name", "geometry", "Latitude", "Longitude"]

# Extract relevant columns and concatenate all data into a single DataFrame
dfs = []
for name, df in csv_dataframes.items():
    # Check if required columns exist
    missing_cols = [col for col in extract_data if col not in df.columns]
    if missing_cols:
        print(f"Warning: {name} is missing columns: {missing_cols}")
    else:
        dfs.append(df[extract_data])

# Concatenate all dataframes
facilities = pd.concat(dfs, ignore_index=True)
# Rename 'fclass' to 'type of facilities'
facilities.rename(columns={"fclass": "type of facilities"}, inplace=True)

# Display the first few rows of the concatenated DataFrame
facilities.head()

Unnamed: 0,type of facilities,name,geometry,Latitude,Longitude
0,hospital,British Columbia Children's Emergency,POINT (-123.123599 49.2455446),49.245545,-123.123599
1,hospital,Emergency,POINT (-123.124844 49.2621467),49.262147,-123.124844
2,hospital,UBC Hospital,POINT (-123.245747 49.2641866),49.264187,-123.245747
3,hospital,Emergency,POINT (-123.0957019 49.2582638),49.258264,-123.095702
4,hospital,CRC Health Centre,POINT (-123.1339341 49.1853789),49.185379,-123.133934


We will remove any missing value

In [11]:
# Remove missing date values from dataframe
# Drop rows with missing values in geometry, latitude, and longitude columns, missing name is not important
final_facilities = facilities.dropna(subset=['geometry', 'Latitude', 'Longitude'])

# Check if there are any remaining missing values
has_missing_values = final_facilities.isna().any().any()

# Output result
if has_missing_values:
    print("The final DataFrame contains NaN or NaT values in names.")
else:
    print("The final DataFrame does not contain NaN or NaT values.")

final_facilities.head()

The final DataFrame contains NaN or NaT values in names.


Unnamed: 0,type of facilities,name,geometry,Latitude,Longitude
0,hospital,British Columbia Children's Emergency,POINT (-123.123599 49.2455446),49.245545,-123.123599
1,hospital,Emergency,POINT (-123.124844 49.2621467),49.262147,-123.124844
2,hospital,UBC Hospital,POINT (-123.245747 49.2641866),49.264187,-123.245747
3,hospital,Emergency,POINT (-123.0957019 49.2582638),49.258264,-123.095702
4,hospital,CRC Health Centre,POINT (-123.1339341 49.1853789),49.185379,-123.133934


In [13]:
# Count the size of the DataFrame
num_rows, num_columns = final_facilities.shape
print(f"Number of rows: {num_rows}")
print(f"Number of columns: {num_columns}")

Number of rows: 9626
Number of columns: 5


In [15]:
# Ensure `final_facilities` is a GeoDataFrame
final_facilities = gpd.GeoDataFrame(
    final_facilities, 
    geometry=gpd.points_from_xy(final_facilities['Longitude'], final_facilities['Latitude'])
)
final_facilities.set_crs(epsg=4326, inplace=True)
final_facilities.head()

Unnamed: 0,type of facilities,name,geometry,Latitude,Longitude
0,hospital,British Columbia Children's Emergency,POINT (-123.12 49.246),49.245545,-123.123599
1,hospital,Emergency,POINT (-123.12 49.262),49.262147,-123.124844
2,hospital,UBC Hospital,POINT (-123.25 49.264),49.264187,-123.245747
3,hospital,Emergency,POINT (-123.1 49.258),49.258264,-123.095702
4,hospital,CRC Health Centre,POINT (-123.13 49.185),49.185379,-123.133934


Import Vancouver housing data

In [19]:
# Importing the houses CSV
vancouver_housing_1 = pd.read_csv('Vancouver_House_data_Final_cleaned.csv')
vancouver_housing_2 = pd.read_csv('Vancouver_Rew_houses_Price_Address_HouseInfo.csv')

vancouver_housing = pd.concat([vancouver_housing_1, vancouver_housing_2], ignore_index=True)

# Price of 1 dollar, means this house needs to bid. We will drop this special
# Remove the "C$" symbol and commas, then convert to numeric
vancouver_housing['Price'] = vancouver_housing['Price'].str.replace(r'[^\d.]', '', regex=True).astype(float)
# Filter out rows where the price is exactly 1
vancouver_housing = vancouver_housing[vancouver_housing['Price'] != 1]

# Drop duplicate
vancouver_housing.drop_duplicates(subset=['Street address'],inplace =True)
print(len(vancouver_housing))


# Create a GeoDataFrame with longitude and latitude as geometry
vancouver_housing = gpd.GeoDataFrame(
    vancouver_housing,
    geometry=gpd.points_from_xy(vancouver_housing['Longitude'], vancouver_housing['Latitude'])
)

# Set the coordinate reference system to EPSG 4326 (WGS84)
vancouver_housing.set_crs(epsg=4326, inplace=True)

# Display the first few rows
vancouver_housing.head()

1219


Unnamed: 0,Street address,City,State,Zip/Postal Code,Country,Latitude,Longitude,Postal Code,Postal code,Neighbourhood,Region,Price,Bedrooms,Bathrooms,Square Footage,Home Type,geometry
0,1525 Coal Harbour Cay #B17,Vancouver,BC,V6G3E7,Canada,49.291479,-123.128047,V6G,V6G,NW West End / Stanley Park,Vancouver,375000.0,1 bd,1 ba,484 sqft,,POINT (-123.13 49.291)
1,2106 SW Marine Dr,Vancouver,BC,V6P6B5,Canada,49.214789,-123.15583,V6P,V6P,SE Kerrisdale / SW Oakridge / West Marpole,Vancouver,27500000.0,6 bds,9 ba,"10,709 sqft",,POINT (-123.16 49.215)
2,3440 Broadway W #126,Vancouver,BC,V6R4R2,Canada,49.264012,-123.180766,V6R,V6R,West Kitsilano / West Point Grey / Jericho,Vancouver,989988.0,3 bds,2 ba,"1,266 sqft",,POINT (-123.18 49.264)
3,8481 Portside Ct,Vancouver,BC,V5P4V4,Canada,49.207213,-123.061855,V5P,V5P,SE Kensington-Cedar Cottage / Victoria- Fraser...,Vancouver,899900.0,3 bds,3 ba,"1,404 sqft",,POINT (-123.06 49.207)
4,2870 SW Marine Dr,Vancouver,BC,V6N3X9,Canada,49.225986,-123.170995,V6N,V6N,West Kerrisdale / South Dunbar- Southlands / M...,Vancouver,16900000.0,7 bds,11 ba,"11,271 sqft",,POINT (-123.17 49.226)


Buffer 0.5Km around each house adress to see how many facilities around the house

In [22]:
# Create buffer around each house point

def create_circle(point, radius_meters=500):
    # Convert radius from meters to degrees (approximate)
    radius_degrees = radius_meters / (40008000 / 360)

    # Create a buffer (circle) around the point
    circle_area= point.buffer(radius_degrees)

    return circle_area

vancouver_housing_gdf = gpd.GeoDataFrame(vancouver_housing, geometry='geometry')
vancouver_housing_gdf['circle'] = vancouver_housing_gdf['geometry'].apply(create_circle)
vancouver_housing_gdf.head()

Unnamed: 0,Street address,City,State,Zip/Postal Code,Country,Latitude,Longitude,Postal Code,Postal code,Neighbourhood,Region,Price,Bedrooms,Bathrooms,Square Footage,Home Type,geometry,circle
0,1525 Coal Harbour Cay #B17,Vancouver,BC,V6G3E7,Canada,49.291479,-123.128047,V6G,V6G,NW West End / Stanley Park,Vancouver,375000.0,1 bd,1 ba,484 sqft,,POINT (-123.13 49.291),"POLYGON ((-123.12 49.291, -123.12 49.291, -123..."
1,2106 SW Marine Dr,Vancouver,BC,V6P6B5,Canada,49.214789,-123.15583,V6P,V6P,SE Kerrisdale / SW Oakridge / West Marpole,Vancouver,27500000.0,6 bds,9 ba,"10,709 sqft",,POINT (-123.16 49.215),"POLYGON ((-123.15 49.215, -123.15 49.214, -123..."
2,3440 Broadway W #126,Vancouver,BC,V6R4R2,Canada,49.264012,-123.180766,V6R,V6R,West Kitsilano / West Point Grey / Jericho,Vancouver,989988.0,3 bds,2 ba,"1,266 sqft",,POINT (-123.18 49.264),"POLYGON ((-123.18 49.264, -123.18 49.264, -123..."
3,8481 Portside Ct,Vancouver,BC,V5P4V4,Canada,49.207213,-123.061855,V5P,V5P,SE Kensington-Cedar Cottage / Victoria- Fraser...,Vancouver,899900.0,3 bds,3 ba,"1,404 sqft",,POINT (-123.06 49.207),"POLYGON ((-123.06 49.207, -123.06 49.207, -123..."
4,2870 SW Marine Dr,Vancouver,BC,V6N3X9,Canada,49.225986,-123.170995,V6N,V6N,West Kerrisdale / South Dunbar- Southlands / M...,Vancouver,16900000.0,7 bds,11 ba,"11,271 sqft",,POINT (-123.17 49.226),"POLYGON ((-123.17 49.226, -123.17 49.226, -123..."


In [24]:
# Rename columns to have `geometry` as polygons and points as `point`
vancouver_housing_gdf = vancouver_housing_gdf.rename(columns={'geometry': 'point'})
vancouver_housing_gdf = vancouver_housing_gdf.rename(columns={'circle': 'geometry'})

vancouver_housing_gdf.geometry.head()

0    POLYGON ((-123.12 49.291, -123.12 49.291, -123...
1    POLYGON ((-123.15 49.215, -123.15 49.214, -123...
2    POLYGON ((-123.18 49.264, -123.18 49.264, -123...
3    POLYGON ((-123.06 49.207, -123.06 49.207, -123...
4    POLYGON ((-123.17 49.226, -123.17 49.226, -123...
Name: geometry, dtype: geometry

In [26]:
# Display the first few rows to verify the changes
vancouver_housing_gdf.head()

Unnamed: 0,Street address,City,State,Zip/Postal Code,Country,Latitude,Longitude,Postal Code,Postal code,Neighbourhood,Region,Price,Bedrooms,Bathrooms,Square Footage,Home Type,point,geometry
0,1525 Coal Harbour Cay #B17,Vancouver,BC,V6G3E7,Canada,49.291479,-123.128047,V6G,V6G,NW West End / Stanley Park,Vancouver,375000.0,1 bd,1 ba,484 sqft,,POINT (-123.13 49.291),"POLYGON ((-123.12 49.291, -123.12 49.291, -123..."
1,2106 SW Marine Dr,Vancouver,BC,V6P6B5,Canada,49.214789,-123.15583,V6P,V6P,SE Kerrisdale / SW Oakridge / West Marpole,Vancouver,27500000.0,6 bds,9 ba,"10,709 sqft",,POINT (-123.16 49.215),"POLYGON ((-123.15 49.215, -123.15 49.214, -123..."
2,3440 Broadway W #126,Vancouver,BC,V6R4R2,Canada,49.264012,-123.180766,V6R,V6R,West Kitsilano / West Point Grey / Jericho,Vancouver,989988.0,3 bds,2 ba,"1,266 sqft",,POINT (-123.18 49.264),"POLYGON ((-123.18 49.264, -123.18 49.264, -123..."
3,8481 Portside Ct,Vancouver,BC,V5P4V4,Canada,49.207213,-123.061855,V5P,V5P,SE Kensington-Cedar Cottage / Victoria- Fraser...,Vancouver,899900.0,3 bds,3 ba,"1,404 sqft",,POINT (-123.06 49.207),"POLYGON ((-123.06 49.207, -123.06 49.207, -123..."
4,2870 SW Marine Dr,Vancouver,BC,V6N3X9,Canada,49.225986,-123.170995,V6N,V6N,West Kerrisdale / South Dunbar- Southlands / M...,Vancouver,16900000.0,7 bds,11 ba,"11,271 sqft",,POINT (-123.17 49.226),"POLYGON ((-123.17 49.226, -123.17 49.226, -123..."


At the final step, we will count the number of each type of facility within the 1500-meter buffer around each house

In [28]:

facility_types = {
    "hospital": "Hospital in area",
    "railway_station": "Railway Station in area",
    "school": "School in area",
    "university": "University in area",
    "college": "College in area",
    "supermarket": "Supermarket in area",
    "bus_stop": "Bus Stop in area",
    "bus_station": "Bus Station in area",
    "pub": "Pub in area",
    "restaurant": "Restaurant in area",
    "cafe": "Cafe in area",
    "cinema": "Cinema in area",
    "dentist": "Dentist in area",
    "fast_food": "Fast Food in area",
    "kindergartens": "Kindergartens in area",
    "malls": "Malls in area",
    "play_ground": "Play Ground in area",
    "parks": "Parks in area",
    "post_office": "Post Office in area"
}

# Add columns to toronto_housing_gdf DataFrame for each facility count, initializing them to 0 if they don’t already exist
for column in facility_types.values():
    if column not in vancouver_housing_gdf.columns:
        vancouver_housing_gdf[column] = 0

# Ensure CRS consistency between the housing and facilities data
if final_facilities.crs != vancouver_housing_gdf.crs:
    final_facilities = final_facilities.to_crs(vancouver_housing_gdf.crs)


# Combine bus_stop and bus_station into one facility type for analysis
final_facilities['type of facilities'] = final_facilities['type of facilities'].replace({
    'bus_stop': 'bus_stop',
    'bus_station': 'bus_stop'
})

# Combine university, college, and school into one facility type for analysis
final_facilities['type of facilities'] = final_facilities['type of facilities'].replace({
    'university': 'school',
    'college': 'school',
    'school': 'school'
})

# Count facilities within 1500m for each type and update `toronto_housing_gdf`
for facility_key, column_name in facility_types.items():
    # Filter facilities by type
    facilities_subset = final_facilities[final_facilities['type of facilities'] == facility_key]

     # Perform spatial join to find facilities within each buffer
    joined = gpd.sjoin(facilities_subset, vancouver_housing_gdf[['geometry']], how='left', predicate='within')
    
    # Count facilities for each housing location and update counts
    counts = joined.groupby('index_right').size()
    vancouver_housing_gdf[column_name] = vancouver_housing_gdf.index.map(counts).fillna(0).astype(int)

    
# Display the updated DataFrame
vancouver_housing_gdf.head()





Unnamed: 0,Street address,City,State,Zip/Postal Code,Country,Latitude,Longitude,Postal Code,Postal code,Neighbourhood,...,Restaurant in area,Cafe in area,Cinema in area,Dentist in area,Fast Food in area,Kindergartens in area,Malls in area,Play Ground in area,Parks in area,Post Office in area
0,1525 Coal Harbour Cay #B17,Vancouver,BC,V6G3E7,Canada,49.291479,-123.128047,V6G,V6G,NW West End / Stanley Park,...,15,6,0,0,2,0,0,0,0,1
1,2106 SW Marine Dr,Vancouver,BC,V6P6B5,Canada,49.214789,-123.15583,V6P,V6P,SE Kerrisdale / SW Oakridge / West Marpole,...,0,0,0,0,0,0,0,0,0,0
2,3440 Broadway W #126,Vancouver,BC,V6R4R2,Canada,49.264012,-123.180766,V6R,V6R,West Kitsilano / West Point Grey / Jericho,...,10,3,0,0,1,0,0,0,0,0
3,8481 Portside Ct,Vancouver,BC,V5P4V4,Canada,49.207213,-123.061855,V5P,V5P,SE Kensington-Cedar Cottage / Victoria- Fraser...,...,0,0,0,0,0,0,0,0,0,0
4,2870 SW Marine Dr,Vancouver,BC,V6N3X9,Canada,49.225986,-123.170995,V6N,V6N,West Kerrisdale / South Dunbar- Southlands / M...,...,0,0,0,0,0,0,0,0,0,0


In [30]:
vancouver_housing_gdf = vancouver_housing_gdf[['Street address', 'Zip/Postal Code','Latitude','Longitude', 'Neighbourhood','Region','Price','Bedrooms', 'Bathrooms','Square Footage','Hospital in area', 
                        'Railway Station in area', 'School in area','Supermarket in area','Bus Stop in area','Pub in area','Restaurant in area',  'Cafe in area','Dentist in area',
                        'Fast Food in area','Kindergartens in area', 'Malls in area',  'Play Ground in area',  'Parks in area',  'Post Office in area'   ]]
vancouver_housing_gdf.to_csv('Vancouver_Houses_Facilities_0.5km.csv')