# Pre-processing

In [1]:
import pandas as pd

In [2]:
# Read in datasets
income_df = pd.read_csv("datasource-AU_Govt_ABS-UoM_AURIN_DB_3_abs_personal_income_total_income_sa2_2011_2018.csv")
population_df = pd.read_csv("datasource-AU_Govt_ABS-UoM_AURIN_DB_3_abs_regional_population_sa2_2001_2021.csv")
income_support_df = pd.read_csv("datasource-TUA_PHIDU-UoM_AURIN_DB_sa2_incomesupport.csv")

# Drop irrelevant attributes for income dataset
suburb_info = income_df[["sa2_code", "sa2_name"]]
income_df = income_df.loc[:, "sum_aud_2011_12":]
income_df = income_df.join(suburb_info)

# Drop irrelevant attributes for population dataset
estimated_population = population_df.loc[:, "erp_2001":"erp_2021"]
population_df = population_df[["state_name_2016", "sa2_maincode_2016", "sa2_name_2016", "area_km2", 
                               "pop_density_2021_people_per_km2", "births_2016_17", "deaths_2016_17", "births_2017_18", 
                               "deaths_2017_18", "births_2018_19", "deaths_2018_19", "births_2019_20", "deaths_2019_20", 
                               "births_2020_21", "deaths_2020_21", "geom"]]
population_df = population_df.join(estimated_population)

# Drop irrelevant attributes for income support dataset
income_support_df = income_support_df[["area_code", "wkb_geometry", "area_name", "age_pens_3_percent_6_13_6_13", 
                                       "child_welfm_3_percent_6_13_6_13", "disab_pens_3_percent_6_13_6_13", 
                                       "hlth_card_3_percent_6_13_6_13", "pens_cd_hlr_3_percent_6_13_6_13", 
                                       "snr_cd_hldr_3_percent_6_13_6_13", "unemply_ben_3_percent_6_13_6_13", 
                                       "welf_dep_fm_3_percent_6_13_6_13"]]

# Filter instances in Victoria state for population dataset
population_df = population_df[population_df["state_name_2016"] == "Victoria"]
suburb_code = population_df[["sa2_maincode_2016"]]

# Filter instances in Victoria state for income dataset
income_df = income_df.merge(suburb_code, how="inner", left_on="sa2_code", right_on="sa2_maincode_2016").drop("sa2_maincode_2016", axis=1)

# Filter instances in Victoria state for income support dataset
income_support_df = income_support_df.merge(suburb_code, how="inner", left_on="area_code", right_on="sa2_maincode_2016").drop("sa2_maincode_2016", axis=1)


In [3]:
income_df.head()

Unnamed: 0,sum_aud_2011_12,sum_aud_2012_13,sum_aud_2013_14,sum_aud_2014_15,sum_aud_2015_16,sum_aud_2016_17,sum_aud_2017_18,median_aud_2011_12,median_aud_2012_13,median_aud_2013_14,...,mean_aud_2011_12,mean_aud_2012_13,mean_aud_2013_14,mean_aud_2014_15,mean_aud_2015_16,mean_aud_2016_17,mean_aud_2017_18,wkb_geometry,sa2_code,sa2_name
0,,,,,,92393.0,67978.0,,,,...,,,,,,23098.0,22659.0,MULTIPOLYGON (((-36.94796718499998 147.9638400...,205021080,Alps - East
1,138757575.0,150479849.0,156897279.0,170304545.0,173633854.0,190521220.0,204771158.0,39358.0,41002.0,41286.0,...,45825.0,48620.0,49888.0,52710.0,54516.0,56551.0,57960.0,MULTIPOLYGON (((-37.57381199199994 143.9440940...,201021012,Gordon (Vicnp)
2,55374077.0,56120678.0,60011749.0,66072540.0,63308746.0,69100808.0,74282185.0,27224.0,27952.0,30334.0,...,33238.0,34430.0,36570.0,39755.0,39128.0,41427.0,43798.0,MULTIPOLYGON (((-37.10084052999997 143.1010622...,201031013,Avoca
3,74997002.0,74230465.0,79296794.0,84398395.0,100042104.0,96877550.0,105093840.0,32754.0,33339.0,34009.0,...,37839.0,37320.0,39510.0,41927.0,48944.0,45355.0,48542.0,MULTIPOLYGON (((-37.42711137699996 143.2086688...,201031014,Beaufort
4,463697666.0,477396122.0,509890212.0,541772711.0,573273287.0,610576684.0,659836541.0,38542.0,39385.0,40903.0,...,48585.0,49059.0,50933.0,52661.0,53105.0,54477.0,56425.0,MULTIPOLYGON (((-38.14423999299998 145.8933609...,205011079,Warragul


In [4]:
population_df.head()

Unnamed: 0,state_name_2016,sa2_maincode_2016,sa2_name_2016,area_km2,pop_density_2021_people_per_km2,births_2016_17,deaths_2016_17,births_2017_18,deaths_2017_18,births_2018_19,...,erp_2012,erp_2013,erp_2014,erp_2015,erp_2016,erp_2017,erp_2018,erp_2019,erp_2020,erp_2021
217,Victoria,201021012,Gordon (Vic.),850.3137,7.1903,46.0,29.0,67.0,30.0,49.0,...,5529,5582,5631,5692,5761,5824,5899,5999,6075,6114
222,Victoria,201031013,Avoca,1714.2399,1.9886,36.0,33.0,28.0,23.0,32.0,...,3342,3337,3322,3317,3323,3330,3352,3364,3370,3409
229,Victoria,201031014,Beaufort,1863.0431,2.4889,26.0,43.0,33.0,33.0,42.0,...,3955,4047,4172,4317,4454,4485,4450,4547,4615,4637
308,Victoria,201031015,Golden Plains - North,922.0812,5.4312,49.0,25.0,46.0,14.0,52.0,...,4195,4284,4363,4444,4540,4635,4742,4862,4938,5008
311,Victoria,204031075,Yackandandah,708.198,6.8145,39.0,27.0,50.0,26.0,27.0,...,4461,4489,4511,4536,4578,4586,4627,4683,4756,4826


In [6]:
income_support_df.head()

Unnamed: 0,area_code,wkb_geometry,area_name,age_pens_3_percent_6_13_6_13,child_welfm_3_percent_6_13_6_13,disab_pens_3_percent_6_13_6_13,hlth_card_3_percent_6_13_6_13,pens_cd_hlr_3_percent_6_13_6_13,snr_cd_hldr_3_percent_6_13_6_13,unemply_ben_3_percent_6_13_6_13,welf_dep_fm_3_percent_6_13_6_13
0,208031187,MULTIPOLYGON (((-38.052374136 145.133050495999...,Chelsea Heights,78.9206,15.9356,5.193,6.9695,22.7224,5.6508,4.4852,7.3529
1,212041313,MULTIPOLYGON (((-37.988333000500035 145.123272...,Dingley Village,70.4383,29.1909,2.8918,5.9308,19.5754,11.315,2.4099,10.8228
2,202011025,"MULTIPOLYGON (((-36.742110119 144.296664128, -...",White Hills - Ascot,68.4005,26.3104,6.4475,11.9321,22.781,8.2668,5.1819,11.6288
3,202021026,MULTIPOLYGON (((-36.88143102900002 144.4951580...,Bendigo Region - South,61.9289,14.5328,4.2828,6.8249,15.6006,11.1675,3.2474,6.7416
4,206051130,"MULTIPOLYGON (((-37.8419793155 144.92918016, -...",Port Melbourne,53.9257,14.4582,4.5555,4.2327,13.4527,12.976,2.8989,5.2826
