In [43]:
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import re
import seaborn as sns

In [23]:
df = pd.read_csv("df_data_suburbs.csv")
df.head()

Unnamed: 0,name,vacancy_rate,rental_stock,population,rental_pop,region,state
0,Braidwood,0.55,5,1647.0,21.54,Queanbeyan-Palerang Regional,New South Wales
1,Karabar,0.83,29,8241.0,26.85,Queanbeyan-Palerang Regional,New South Wales
2,Queanbeyan,1.41,48,6236.0,38.85,Queanbeyan-Palerang Regional,New South Wales
3,Queanbeyan West,1.24,17,3136.0,29.31,Queanbeyan-Palerang Regional,New South Wales
4,Jerrabomberra,0.63,21,9505.0,19.9,Queanbeyan-Palerang Regional,New South Wales


In [98]:
states = list(df["state"].unique()[:-1])

def remove_outliers(df):
    """Filter outliers."""
    q1 = df.quantile(0.25)
    q3 = df.quantile(0.75)
    iqr = q3 - q1
    upper_thresh = q3 + 1.5*iqr
    
    return df[df < upper_thresh]

def create_distplot_by_state(stat):
    stat_by_state = {state: remove_outliers(df[stat][df["state"] == state].dropna()) for state in states}
    
    # group data together
    hist_data = list(stat_by_state.values())
    group_labels = list(stat_by_state.keys())

    # create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, bin_size=0.1)
    fig.update_layout(
        title=f"Suburb {stat.replace('_', ' ').title()} Distribution by State", 
        template="seaborn"
    )
    fig.show()

In [96]:
stats = ["vacancy_rate", "rental_stock", "population", "rental_pop"]

In [99]:
create_distplot_by_state("rental_pop")

In [28]:
df_filtered = df[df["region"] == "Monash"]
df_filtered = df[df["state"] == "Victoria"]

fig = px.scatter(
    df_filtered, 
    x="vacancy_rate", 
    y="rental_stock",
    color="state",
    hover_data=['name'],
    template="seaborn"
)
fig.update_layout(
    xaxis_range=[-0.5, 10],
    yaxis_range=[-2, 100],
)