# Geospatial Visualisation

In [6]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import contextily as ctx
import folium

import sys, os
sys.path.append(os.path.abspath('../'))
from scripts.utils import create_dir, get_runtime
import time
start_time = time.time()

## Visualise Predictions

In [118]:
# Function to plot interactive geospatial heatmap over OpenStreetMap using folium, allowing multiple columns and multiple basemap types
def plot_geospatial(gdf, column, title, cmap, property_type, joining_column='suburb', custom_bins=None):
    """ Plot an interactive heatmap using folium, overlaying on OpenStreetMap, with multiple columns and multiple basemap options, then save to HTML."""


    # Convert GeoDataFrame to WGS84 (lat/lon) if not already in that CRS
    if gdf.crs != 'epsg:4326':
        gdf = gdf.to_crs(epsg=4326)
    # Create a folium map centered around Victoria, Australia, with the selected basemap type
    m = folium.Map(location=[-37.8136, 144.9631], zoom_start=7, tiles='CartoDB positron')

    folium.Choropleth(
        geo_data=gdf,  # Use the GeoDataFrame with the geometry
        name=f'{property_type} {title} - {column.replace("_", " ").title()}',
        data=gdf,
        columns=[joining_column, column],  # Column that contains the unique SA2 codes and the heatmap values
        key_on=f'feature.properties.{joining_column}',  # Key to match the SA2 codes
        fill_color=cmap,  # Color map
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=f'{title} (Scale) - {column.replace("_", " ").title()}',
        bins = custom_bins if custom_bins else 4,
    ).add_to(m)

    # Save the map to an HTML file
    m.save(f"../plots/{property_type.lower()}.html")

    # Return the map object for display in Jupyter Notebook (last one created)
    return m


In [None]:
# get predictions and join with suburb shapefile for plotting
house_preds_df = pd.read_csv('../data/curated/predicted_growth_house.csv')
unit_preds_df = pd.read_csv('../data/curated/predicted_growth_unit.csv')
house_preds_df = house_preds_df.rename(columns={'rented_price': 'rented_price_2018'})
unit_preds_df = unit_preds_df.rename(columns={'rented_price': 'rented_price_2018'})
suburb_gdf = gpd.read_file('../data/landing/suburb-shapefile/')
suburb_gdf = suburb_gdf[suburb_gdf['STE_NAME21'] == 'Victoria']
suburb_gdf = suburb_gdf.rename(columns={'SAL_NAME21': 'suburb'})

aff_df = pd.read_csv('../data/curated/affordability.csv')
aff_df['postcode'] = aff_df['postcode'].astype(str)
live_df = pd.read_csv('../data/curated/liveability.csv')
live_df['postcode'] = live_df['postcode'].astype(str)
postcode_gdf = gpd.read_file('../data/landing/postcode-shapefile/')
postcode_gdf = postcode_gdf.rename(columns={'POA_CODE21': 'postcode'})

# output dataframe shapes
print('House Predictions:', house_preds_df.shape)
print('Unit Predictions:', unit_preds_df.shape)
print('Affordability:', aff_df.shape)
print('Liveability:', live_df.shape)

In [107]:
# merge with suburb shapefile
house_preds_df = house_preds_df.merge(suburb_gdf[['suburb', 'geometry']], on='suburb', how='inner')
unit_preds_df = unit_preds_df.merge(suburb_gdf[['suburb', 'geometry']], on='suburb', how='inner')

# merge with postcode shapefile
aff_df = aff_df.merge(postcode_gdf[['postcode', 'geometry']], on='postcode', how='inner')
live_df = live_df.merge(postcode_gdf[['postcode', 'geometry']], on='postcode', how='inner')

# create growth rate columns for house and unit with mean rented price from 2021-2023 as baseline and pred_2027 as target
house_preds_df['baseline_rented_price'] = house_preds_df[['rented_price_2021', 'rented_price_2022', 'rented_price_2023']].mean(axis=1)
house_preds_df['growth_rate'] = (house_preds_df['pred_2027'] - house_preds_df['baseline_rented_price']) / house_preds_df['baseline_rented_price']

unit_preds_df['baseline_rented_price'] = unit_preds_df[['rented_price_2021', 'rented_price_2022', 'rented_price_2023']].mean(axis=1)
unit_preds_df['growth_rate'] = (unit_preds_df['pred_2027'] - unit_preds_df['baseline_rented_price']) / unit_preds_df['baseline_rented_price']

# turn liveability rank into a score from 0-1
live_df['liveability'] = 1 - (live_df['rank'] / live_df['rank'].max())

In [None]:
house_preds_gdf = gpd.GeoDataFrame(house_preds_df, crs='EPSG:4326', geometry=house_preds_df['geometry'])
unit_preds_gdf = gpd.GeoDataFrame(unit_preds_df, crs='EPSG:4326', geometry=unit_preds_df['geometry'])
aff_gdf = gpd.GeoDataFrame(aff_df, crs='EPSG:4326', geometry=aff_df['geometry'])
live_gdf = gpd.GeoDataFrame(live_df, crs='EPSG:4326', geometry=live_df['geometry'])

# convert confidence from high/low to boolean and drop NA values
house_preds_gdf['is_high_confidence'] = house_preds_gdf['confidence'].map({'high': 1, 'low': 0})
unit_preds_gdf['is_high_confidence'] = unit_preds_gdf['confidence'].map({'high': 1, 'low': 0})
house_preds_gdf = house_preds_gdf.dropna(subset=['pred_2027', 'rented_price_2023'])
unit_preds_gdf = unit_preds_gdf.dropna(subset=['pred_2027', 'rented_price_2023'])

# remove low confidence
house_preds_gdf = house_preds_gdf[house_preds_gdf['is_high_confidence'] == 1]
unit_preds_gdf = unit_preds_gdf[unit_preds_gdf['is_high_confidence'] == 1]

# output geo dataframe shapes
print('House Predictions:', house_preds_gdf.shape)
print('Unit Predictions:', unit_preds_gdf.shape)
print('Affordability:', aff_gdf.shape)
print('Liveability:', live_gdf.shape)


In [None]:
plot_geospatial(
    house_preds_gdf[house_preds_gdf['model'] == 'rf'],
    'growth_rate',
    'Suburb Growth Rate (House Rental Price)',
    'RdYlGn',
    'House',
    'suburb',
    custom_bins=[-1, -0.5, 0, 0.5, 1]
)

In [None]:
plot_geospatial(
    unit_preds_gdf[unit_preds_gdf['model'] == 'rf'],
    'growth_rate',
    'Suburb Growth Rate (Unit Rental Price)',
    'RdYlGn',
    'Unit_Apt',
    'suburb',
    custom_bins=[-1, -0.5, 0, 0.5, 1]
)

In [None]:
# plot geospatial heatmap for affordability
plot_geospatial(
    aff_gdf,
    'price/income',
    'Suburb Affordability', 
    'Greens',
    'Affordability',
    'postcode',
)

In [None]:
# plot liveability
plot_geospatial(
    live_gdf,
    'liveability',
    'Suburb Liveability',
    'Blues',
    'Liveability',
    'postcode'
)