In [12]:
import warnings
import pandas as pd
import geopandas as gpd

warnings.filterwarnings('ignore')

df = pd.read_csv("../data/curated/income_forecasted.csv")
population = pd.read_csv('../data/curated/final_population.csv')

In [13]:
# Remove suburbs where the counts for income are 0 for all years (nobody lives there)

empty_suburbs = ['industrial', 'lake king', 'migratory', 'moorabbin airport',
    'no usual address', 'offshore', 'royal botanic gardens victoria',
    'shipping', 'wilsons promontory']

df = df[~df['suburb'].isin(empty_suburbs)]

# Only filter out columns we want
df = df[['year', 'suburb', 'hi_4000_more_tot']]

combined_df = pd.merge(df, population, left_on=['year', 'suburb'], right_on=['year', 'sa2_name'])
combined_df = combined_df.drop('sa2_name', axis = 1)

# Take the proportion of people earning more than 4000 a week per suburb in 2021
combined_df['proportion']= combined_df['hi_4000_more_tot'] / combined_df['erp']

combined_2021 = combined_df[combined_df['year'] == 2021]

# Sort by descending order
combined_2021 = combined_2021.sort_values(by='proportion', ascending=False)

In [42]:
combined_2021

Unnamed: 0,year,suburb,hi_4000_more_tot,erp,proportion
5441,2021,toorak,1851,13096,0.141341
1829,2021,east melbourne,685,4959,0.138133
725,2021,brighton,3098,23044,0.134438
1517,2021,cremorne,2309,17977,0.128442
1805,2021,eaglemont,985,7799,0.126298
...,...,...,...,...,...
4853,2021,sebastopol,112,13062,0.008574
4049,2021,norlane,69,9040,0.007633
101,2021,alps,0,23,0.000000
5549,2021,upper yarra valley,0,240,0.000000


In [43]:
## Visualisations (Bar plot + Map)



In [44]:
vic_suburbs_gdf = gpd.read_file('../data/map/Vic_Localities/gda2020_vicgrid/esrishape/whole_of_dataset/victoria/VMADMIN/LOCALITY_POLYGON.shp')
vic_suburbs_gdf = vic_suburbs_gdf.to_crs(epsg=4326)
vic_suburbs_gdf['suburb'] = vic_suburbs_gdf['GAZLOC'].str.lower()

merged = vic_suburbs_gdf.merge(combined_2021, on='suburb')

merged = merged.drop(columns=['PFI_CR', 'UFI_OLD', 'UFI_CR'], errors='ignore')