In [None]:
import pandas as pd
import json
import geopandas as gpd
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import re
import seaborn as sns

# Data

In [None]:
df = pd.read_csv("df_data_suburbs.csv")
df.head()

In [None]:
states = list(df["state"].unique()[:-1])

def remove_outliers(df):
    """Filter outliers."""
    q1 = df.quantile(0.25)
    q3 = df.quantile(0.75)
    iqr = q3 - q1
    upper_thresh = q3 + 1.5*iqr
    
    return df[df < upper_thresh]

def create_distplot_by_state(stat):
    stat_by_state = {state: remove_outliers(df[stat][df["state"] == state].dropna()) for state in states}
    
    # group data together
    hist_data = list(stat_by_state.values())
    group_labels = list(stat_by_state.keys())

    # create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, bin_size=0.1)
    fig.update_layout(
        title=f"Suburb {stat.replace('_', ' ').title()} Distribution by State", 
        template="seaborn"
    )
    fig.show()

In [None]:
stats = ["vacancy_rate", "rental_stock", "population", "rental_pop"]

In [None]:
create_distplot_by_state("rental_pop")

In [None]:
df_filtered = df[df["region"] == "Monash"]
df_filtered = df[df["state"] == "Victoria"]

fig = px.scatter(
    df_filtered, 
    x="vacancy_rate", 
    y="rental_stock",
    color="state",
    hover_data=['name'],
    template="seaborn"
)
fig.update_layout(
    xaxis_range=[-0.5, 10],
    yaxis_range=[-2, 100],
)

# Table

In [None]:
df_houses = pd.read_csv("df_tables_houses.csv")

In [None]:
suburbs = df["name"][df["state"] == "Victoria"].unique()

In [None]:
houses_vic = df_houses[df_houses["Suburb"].isin(suburbs)].merge(df, left_on='Suburb', right_on='name')
houses_vic.head()

In [None]:
px.scatter(
    houses_vic, 
    x="Median listing price", 
    #y="Median weekly rent",
    y="Median yield %",
    hover_data={"Suburb": True},
    template="seaborn"
)

# Choropleth

In [None]:
gdf = gpd.read_file("C:/Users/yeh/Documents/property_market_analysis/data/SA2_SHP/SA2_2021_AUST_GDA2020.shp")[[
    "SA2_CODE21", "SA2_NAME21", "geometry" 
]]

In [None]:
geojson_data = gdf.to_crs(4326)
geojson_data.to_file('geojson_data.geojson', driver='GeoJSON') #SHP to GeoJSON
geojson = json.load(open('geojson_data.geojson', 'r'))

In [None]:
gdf_houses_vic = houses_vic.merge(gdf.reset_index(), left_on="Suburb", right_on="SA2_NAME21")

In [None]:
# # Create the choropleth map
# fig = px.choropleth(
#     gdf_houses_vic, 
#     geojson=geojson, 
#     locations="index", 
#     color='Median listing price',
#     hover_name='SA2_NAME21', 
#     hover_data=['Median listing price']
# )

# # Update map layout to make it more presentable
# #fig.update_geos(fitbounds="locations", visible=False)
# fig.update_layout(title="Victoria")

# # Show the plot
# fig.show()