<h1>Affordability</h1>

Measuring affordability of postcode based on median income, median mortage, median rental price and average facilities

In [13]:
# importing necessary libraries
import pandas as pd
import numpy as np

In [4]:
# Reading in median rental price per postcode
median_rent_df = pd.read_csv("../data/curated/median_rental_postcode.csv")
#median_rent_df.head()

Unnamed: 0,Postcode,Cost
0,3000,510.0
1,3002,642.5
2,3003,520.0
3,3004,550.0
4,3006,551.5


In [3]:
# Reading in preprocessed property data
property_df = pd.read_csv("../data/curated/properties_processed.csv")

# Remove unecessary column "Unnamed: 0"
property_df = property_df.drop(columns = ["Unnamed: 0"])
#property_df.head()

Unnamed: 0,Name,Cost,Coordinates,Bed,Bath,Parking,Property_Type,Agency,Postcode
0,5408/500 Elizabeth Street Melbourne VIC 3000,440.0,"[-37.8072443, 144.9602814]",1,1,0,Apartment / Unit / Flat,BRADY residential,3000
1,502/118 Russell Street Melbourne VIC 3000,620.0,"[-37.8135864, 144.9687232]",1,1,0,Apartment / Unit / Flat,Dingle Partners,3000
2,202A/441 Lonsdale Street Melbourne VIC 3000,300.0,"[-37.8134292, 144.9594445]",1,1,0,Apartment / Unit / Flat,Biggin & Scott Stonnington,3000
3,57/243 Collins Street Melbourne VIC 3000,400.0,"[-37.8159969, 144.9657956]",1,1,0,Apartment / Unit / Flat,Harcourts Melbourne City,3000
4,2311/601 Little Lonsdale Street Melbourne VIC ...,625.0,"[-37.8137564, 144.9537143]",2,2,1,Apartment / Unit / Flat,Harcourts Melbourne City,3000


In [5]:
# Calculate the average number of facilities per postcode
avg_bed = property_df.groupby(by = "Postcode")["Bed"].mean()
#avg_bed.head()

Postcode
3000    1.579310
3002    1.909091
3003    1.796875
3004    1.763441
3006    1.785714
Name: Bed, dtype: float64

In [6]:
avg_bath = property_df.groupby(by = "Postcode")["Bath"].mean()
#avg_bath.head()

Postcode
3000    1.296552
3002    1.318182
3003    1.343750
3004    1.494624
3006    1.469388
Name: Bath, dtype: float64

In [7]:
avg_parking = property_df.groupby(by = "Postcode")["Parking"].mean()
#avg_parking.head()

Postcode
3000    0.365517
3002    1.000000
3003    0.609375
3004    1.096774
3006    0.647959
Name: Parking, dtype: float64

In [8]:
# Create dataframe of average facilities per postcode
facilities_df = pd.DataFrame()
facilities_df["Postcode"] = property_df["Postcode"].unique()
facilities_df["Average # Beds"] = avg_bed.to_list()
facilities_df["Average # Baths"] = avg_bath.to_list()
facilities_df["Average # Parking"] = avg_parking.to_list()
#facilities_df.head()

Unnamed: 0,Postcode,Average # Beds,Average # Baths,Average # Parking
0,3000,1.57931,1.296552,0.365517
1,3002,1.909091,1.318182,1.0
2,3003,1.796875,1.34375,0.609375
3,3004,1.763441,1.494624,1.096774
4,3006,1.785714,1.469388,0.647959


In [9]:
# Joining median rental price per suburb with average facilities
facilities_df = facilities_df.set_index("Postcode")
price_by_facility_df = median_rent_df.join(facilities_df, on = "Postcode")
#price_by_facility_df.head()

Unnamed: 0,Postcode,Cost,Average # Beds,Average # Baths,Average # Parking
0,3000,510.0,1.57931,1.296552,0.365517
1,3002,642.5,1.909091,1.318182,1.0
2,3003,520.0,1.796875,1.34375,0.609375
3,3004,550.0,1.763441,1.494624,1.096774
4,3006,551.5,1.785714,1.469388,0.647959


In [34]:
# Reading in csv files for obtaining census data
sa2_postcode_map = pd.read_csv("../data/curated/sa2_postcode_mapping_2021.csv")
sa2_postcode_map.set_index("sa2_2021", inplace = True)
census_df = pd.read_csv("../data/curated/census_data.csv")

In [35]:
# Function for converting sa2 census data to postcode data
def convert_census_to_postcode(census_df, sa2_postcode_map, agg_func):
    ''' Inputs census data as indexed by SA2 and converts it to postcode through aggregation
    '''

    census_df_postcode = sa2_postcode_map.merge(census_df, on='sa2_2021').drop('sa2_2021', axis=1)
    census_df_postcode = census_df_postcode[census_df_postcode['postcode_2021'] >= 3000]

    census_df_postcode_agg = census_df_postcode.groupby('postcode_2021').agg(
        tot_population_11 = pd.NamedAgg(column='Tot_persons_C11_P', aggfunc=sum),
        tot_population_16 = pd.NamedAgg(column='Tot_persons_C16_P', aggfunc=sum),
        tot_population_21 = pd.NamedAgg(column='Tot_persons_C21_P', aggfunc=sum),
        avg_med_mortg_rep_11 = pd.NamedAgg(column='Med_mortg_rep_mon_C2011', aggfunc=agg_func),
        avg_med_mortg_rep_16 = pd.NamedAgg(column='Med_mortg_rep_mon_C2016', aggfunc=agg_func),
        avg_med_mortg_rep_21 = pd.NamedAgg(column='Med_mortg_rep_mon_C2021', aggfunc=agg_func),
        avg_med_person_inc_11 = pd.NamedAgg(column='Med_person_inc_we_C2011', aggfunc=agg_func),
        avg_med_person_inc_16 = pd.NamedAgg(column='Med_person_inc_we_C2016', aggfunc=agg_func),
        avg_med_person_inc_21 = pd.NamedAgg(column='Med_person_inc_we_C2021', aggfunc=agg_func),
        avg_med_rent_16 = pd.NamedAgg(column='Med_rent_weekly_C2011', aggfunc=agg_func),
        avg_med_rent_11 = pd.NamedAgg(column='Med_rent_weekly_C2016', aggfunc=agg_func),
        avg_med_rent_21 = pd.NamedAgg(column='Med_rent_weekly_C2021', aggfunc=agg_func),
        avg_med_hh_inc_16 = pd.NamedAgg(column='Med_tot_hh_inc_wee_C2011', aggfunc=agg_func),
        avg_med_hh_inc_11 = pd.NamedAgg(column='Med_tot_hh_inc_wee_C2016', aggfunc=agg_func),
        avg_med_hh_inc_21 = pd.NamedAgg(column='Med_tot_hh_inc_wee_C2021', aggfunc=agg_func),
        tot_avg_hh_size_16 = pd.NamedAgg(column='Average_hh_size_C2011', aggfunc=agg_func),
        tot_avg_hh_size_11 = pd.NamedAgg(column='Average_hh_size_C2016', aggfunc=agg_func),
        tot_avg_hh_size_21 = pd.NamedAgg(column='Average_hh_size_C2021', aggfunc=agg_func),
    ).reset_index()

    return census_df_postcode_agg

In [55]:
mean_no_zero = lambda lst: round(np.mean([x for x in lst if x > 0]), 2)
census_by_postcode_df = convert_census_to_postcode(census_df, sa2_postcode_map, mean_no_zero)
#census_by_postcode_df.head()

Unnamed: 0,postcode_2021,tot_population_11,tot_population_16,tot_population_21,avg_med_mortg_rep_11,avg_med_mortg_rep_16,avg_med_mortg_rep_21,avg_med_person_inc_11,avg_med_person_inc_16,avg_med_person_inc_21,avg_med_rent_16,avg_med_rent_11,avg_med_rent_21,avg_med_hh_inc_16,avg_med_hh_inc_11,avg_med_hh_inc_21,tot_avg_hh_size_16,tot_avg_hh_size_11,tot_avg_hh_size_21
0,3000,124551,167166,178424,2213.38,2040.38,2040.19,862.18,5483.82,6467.76,395.76,447.06,418.19,1482.53,1896.76,2159.41,1.88,1.97,1.86
1,3002,68729,82804,89023,2357.78,2173.67,2155.22,1091.8,8969.6,10432.9,398.0,460.33,449.67,1709.4,2415.0,2598.8,1.82,1.91,1.87
2,3003,15496,20633,23083,2200.0,2050.0,2085.0,701.5,716.0,1000.0,395.0,418.5,385.5,1466.0,1493.5,1751.0,2.15,2.15,1.95
3,3004,100879,123254,129273,2331.58,2155.67,2149.75,1066.08,7152.46,8339.46,391.15,446.83,440.75,1688.85,2270.46,2471.46,1.83,1.89,1.84
4,3006,21150,30239,36164,2477.25,2217.75,2079.0,1132.4,16783.0,19507.0,406.8,501.0,461.0,1637.2,2883.2,3088.8,1.8,1.92,1.92


In [56]:
# Remove unnecessary columns from census data (only need columns containing data from 2021)
census_by_postcode_df = census_by_postcode_df[["postcode_2021", "tot_population_21", "avg_med_mortg_rep_21", "avg_med_person_inc_21", "avg_med_rent_21", 
                            "avg_med_hh_inc_21", "tot_avg_hh_size_21"]]

# Select columns required for assessing affordability
census_income_df = census_by_postcode_df[["postcode_2021", "avg_med_mortg_rep_21", "avg_med_person_inc_21"]]
#census_income_df.head()

Unnamed: 0,postcode_2021,avg_med_mortg_rep_21,avg_med_person_inc_21
0,3000,2040.19,6467.76
1,3002,2155.22,10432.9
2,3003,2085.0,1000.0
3,3004,2149.75,8339.46
4,3006,2079.0,19507.0


In [57]:
# Joining census income dataframe to price by facilities dataframe
census_income_df = census_income_df.rename({"postcode_2021": "Postcode"}, axis = 1)
#census_income_df.head()
df = census_income_df.join(price_by_facility_df)

Unnamed: 0,Postcode,avg_med_mortg_rep_21,avg_med_person_inc_21
0,3000,2040.19,6467.76
1,3002,2155.22,10432.9
2,3003,2085.0,1000.0
3,3004,2149.75,8339.46
4,3006,2079.0,19507.0
