In [11]:
import pandas as pd
import json
import geopandas as gpd
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import re
import seaborn as sns
from database import connect_to_db
from datetime import datetime

# Data

In [46]:
def load_df(db, name):
    coll = db[name]
    return pd.DataFrame(coll.find({})).drop(columns="_id")

def filter_df(df, condition):
    key = list(condition.keys())[0]
    value = condition[key]
    
    if key == "Date":
        df = df[df["Date"] == pd.to_datetime(value)].drop(columns="Date")
    elif key == "name" or key == "Suburb":
        df = df[df["name"] == value].drop(columns="name")
    return df

def load_filter_df(db, name, condition):
    df = load_df(db, name)
    return filter_df(df, condition)

In [48]:
db = connect_to_db()

Connected to db.


In [50]:
#df = load_filter_df(db, "data_suburbs", {"Suburb": "Vermont South"})
df = load_filter_df(db, "data_suburbs", {"Date": "2025-04-01"})
df

Unnamed: 0,name,vacancy_rate,rental_stock,population,rental_pop,region,state
2761,Bombala,0.39,3,1387.0,20.00,Snowy Monaro Regional,New South Wales
2762,Narooma,0.30,6,2607.0,20.12,Eurobodalla,New South Wales
2763,Bermagui,0.29,4,1542.0,17.57,Bega Valley,New South Wales
2764,Goulburn,0.79,94,22418.0,28.62,Goulburn Mulwaree,New South Wales
2765,Yass,0.61,19,6506.0,21.05,Yass Valley,New South Wales
...,...,...,...,...,...,...,...
5517,East Moonta,,0,151.0,10.53,Copper Coast,South Australia
5518,South Kalgoorlie,0.68,14,4914.0,30.56,Kalgoorlie/Boulder,Western Australia
5519,West Kalgoorlie,,0,,,Kalgoorlie/Boulder,Western Australia
5520,South Greenough,,0,94.0,,Greater Geraldton,Western Australia


In [51]:
states = list(df["state"].unique()[:-1])

def remove_outliers(df):
    """Filter outliers."""
    q1 = df.quantile(0.25)
    q3 = df.quantile(0.75)
    iqr = q3 - q1
    upper_thresh = q3 + 1.5*iqr
    
    return df[df < upper_thresh]

def create_distplot_by_state(stat):
    stat_by_state = {state: remove_outliers(df[stat][df["state"] == state].dropna()) for state in states}
    
    # group data together
    hist_data = list(stat_by_state.values())
    group_labels = list(stat_by_state.keys())

    # create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, bin_size=0.1)
    fig.update_layout(
        title=f"Suburb {stat.replace('_', ' ').title()} Distribution by State", 
        template="seaborn"
    )
    fig.show()

In [52]:
stats = ["vacancy_rate", "rental_stock", "population", "rental_pop"]

In [53]:
create_distplot_by_state("rental_pop")

In [20]:
df_filtered = df[df["region"] == "Monash"]
df_filtered = df[df["state"] == "Victoria"]

fig = px.scatter(
    df_filtered, 
    x="vacancy_rate", 
    y="rental_stock",
    color="state",
    hover_data=['name'],
    template="seaborn"
)
fig.update_layout(
    xaxis_range=[-0.5, 10],
    yaxis_range=[-2, 100],
)

# Table

In [13]:
df_houses = pd.read_csv("df_tables_houses.csv")
df_town_houses = pd.read_csv("df_tables_town_houses.csv")
df_units = pd.read_csv("df_tables_units.csv")

In [16]:
def merge_suburb(df_houses, state):
    suburbs = df["name"][df["state"] == state].unique()
    df_filtered = df_houses[df_houses["Suburb"].isin(suburbs)].merge(df, left_on='Suburb', right_on='name')
    return df_filtered

In [23]:
df_houses

Unnamed: 0,Suburb,Median listing price,Median price change - last quarter (%),Median price change - 1 year (%),Median price change - 2 years (%),Median weekly rent,Median yield %,Median rent change - 1 year (%),Previous month sales,Stock on market previous month,Stock variance vs. last year (%),Average days on market
0,Braidwood,789000.0,-1.26,-1.26,13.52,520.0,3.42,4.00,1.0,34.0,30.77,169.0
1,Karabar,750000.0,-1.19,-1.64,-5.90,640.0,4.43,-1.54,4.0,27.0,3.85,67.0
2,Queanbeyan,837000.0,3.65,11.60,2.19,600.0,3.72,3.44,6.0,27.0,17.39,89.0
3,Queanbeyan West,,,,,675.0,,6.29,1.0,10.0,0.00,46.0
4,Jerrabomberra,1185000.0,3.39,19.39,8.56,770.0,3.37,1.31,5.0,41.0,0.00,88.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2716,South Burnie,450000.0,0.00,4.89,,,,,0.0,4.0,-33.33,
2717,South Spreyton,,,,,,,,0.0,1.0,-66.67,
2718,East Palmerston,,,,,,,,0.0,1.0,100.00,
2719,East Russell,,,,,,,,0.0,2.0,-50.00,


In [29]:
houses_vic.columns

Index(['Suburb', 'Median listing price',
       'Median price change - last quarter (%)',
       'Median price change - 1 year (%)', 'Median price change - 2 years (%)',
       'Median weekly rent', 'Median yield %',
       'Median rent change - 1 year (%)', 'Previous month sales',
       'Stock on market previous month', 'Stock variance vs. last year (%)',
       'Average days on market', 'name', 'vacancy_rate', 'rental_stock',
       'population', 'rental_pop', 'region', 'state'],
      dtype='object')

In [22]:
houses_vic = merge_suburb(df_units, "Victoria")
px.scatter(
    houses_vic, 
    x="Median listing price", 
    #y="Median weekly rent",
    y="Median yield %",
    hover_data={"Suburb": True},
    template="seaborn"
)

# Choropleth

In [None]:
gdf = gpd.read_file("C:/Users/yeh/Documents/property_market_analysis/data/SA2_SHP/SA2_2021_AUST_GDA2020.shp")[[
    "SA2_CODE21", "SA2_NAME21", "geometry" 
]]

In [None]:
geojson_data = gdf.to_crs(4326)
geojson_data.to_file('geojson_data.geojson', driver='GeoJSON') #SHP to GeoJSON
geojson = json.load(open('geojson_data.geojson', 'r'))

In [None]:
gdf_houses_vic = houses_vic.merge(gdf.reset_index(), left_on="Suburb", right_on="SA2_NAME21")

In [None]:
# # Create the choropleth map
# fig = px.choropleth(
#     gdf_houses_vic, 
#     geojson=geojson, 
#     locations="index", 
#     color='Median listing price',
#     hover_name='SA2_NAME21', 
#     hover_data=['Median listing price']
# )

# # Update map layout to make it more presentable
# #fig.update_geos(fitbounds="locations", visible=False)
# fig.update_layout(title="Victoria")

# # Show the plot
# fig.show()