## Converts SA2 granular dataframe to suburbs

In [4]:
import pandas as pd
import ast

In [5]:
# don't edit this (except for regions_df location)

regions_df = pd.read_csv('../../data/raw/location/sa2_to_rental_suburb_groups.csv')

regions_df_list = regions_df.copy()
regions_df_list['code'] = regions_df_list['code'].apply(ast.literal_eval)

exploded_regions = regions_df_list.explode('code')
exploded_regions['code'] = exploded_regions['code'].apply(pd.to_numeric, errors='coerce')

def df_to_regions(df, sa2_col_name, aggregation_functions, year_col_name = None, quarter_col_name = None):
    df[sa2_col_name] = df[sa2_col_name].apply(pd.to_numeric, errors='coerce')
    regions_with_stats = pd.merge(exploded_regions, df, left_on='code', right_on=sa2_col_name, how='left')

    for col in aggregation_functions:
        regions_with_stats[col] = regions_with_stats[col].apply(pd.to_numeric, errors='coerce')

    if quarter_col_name:
        grouped_by = regions_with_stats.groupby(['suburbs', year_col_name, quarter_col_name]).agg(aggregation_functions)
    elif year_col_name:
        grouped_by = regions_with_stats.groupby(['suburbs', year_col_name]).agg(aggregation_functions)
    else:
        grouped_by = regions_with_stats.groupby('suburbs').agg(aggregation_functions)

    return grouped_by.reset_index()

exploded_regions

Unnamed: 0.1,Unnamed: 0,geometry,suburbs,regions,code
0,0,POLYGON ((144.97018001032677 -37.8606998481848...,Albert Park-Middle Park-West St Kilda,"['Albert Park', 'St Kilda - West']",206051128
0,0,POLYGON ((144.97018001032677 -37.8606998481848...,Albert Park-Middle Park-West St Kilda,"['Albert Park', 'St Kilda - West']",206051514
1,1,POLYGON ((144.79015492149858 -37.8621593959499...,Altona,"['Altona', 'Altona North']",213021341
1,1,POLYGON ((144.79015492149858 -37.8621593959499...,Altona,"['Altona', 'Altona North']",213021343
2,2,POLYGON ((145.01167433388778 -37.8535692509816...,Armadale,['Armadale'],206061135
...,...,...,...,...,...
141,141,POLYGON ((144.8869958358719 -37.85078700244943...,Williamstown,['Williamstown'],213021346
142,142,POLYGON ((146.77393701213975 -36.1280104970832...,Wodonga,"['Wodonga', 'West Wodonga']",204031492
142,142,POLYGON ((146.77393701213975 -36.1280104970832...,Wodonga,"['Wodonga', 'West Wodonga']",204031073
143,143,POLYGON ((144.85984002458792 -37.8138202803743...,Yarraville-Seddon,"['Yarraville', 'Seddon - Kingsville']",213031352


Need to edit:
1. The load path of the dataframe to aggregate
2. The output path to save the output
3. Create the aggregation function dictinary
4. Call the aggregation function and save the output

In [6]:
# EDIT THESE FIELDS
LOAD_PATH = '../../data/raw/housing/merged.csv'
OUTPUT_PATH = '../../data/raw/housing/merged_as_suburbs.csv'

df = pd.read_csv(LOAD_PATH)

# This section is just to make the aggregation a bit easier
# is in the form { 'col_name': 'func', 'col2': 'func' }
# ---------------------------------------------------------

# list of columns to take the sum of the aggregation for
sum_list = [
    'economic: number of earners',
    'population: total',
]

# list of columns to take the avg of the aggregation for
avg_list = [
    'economic: median income',
    'economic: median age of earners',
    'economic: gini coefficient',
    'economic: top 1% suburb',
    'economic: top 5% suburb',
    'economic: trimmed mean',
    'economic: variable interest rate',
    'economic: gdp annual',
    'population: growth'
]

agg_functions = {col_name: 'mean' for col_name in avg_list}
agg_functions.update(
    {col_name: 'sum' for col_name in sum_list}
)

# ----------------------------------------------------------

# EDIT ME: Calls the actual join function
out = df_to_regions(df = df, sa2_col_name = 'SA2 code', aggregation_functions=agg_functions, year_col_name = 'year')
out.to_csv(OUTPUT_PATH)
out

Unnamed: 0,suburbs,year,economic: median income,economic: median age of earners,economic: gini coefficient,economic: top 1% suburb,economic: top 5% suburb,economic: trimmed mean,economic: variable interest rate,economic: gdp annual,population: growth,economic: number of earners,population: total
0,Albert Park-Middle Park-West St Kilda,2017,62675.00,39.50,0.5565,14.55,30.75,1.660228,-0.031851,3.031635,0.007761,22126.5,31878.0
1,Albert Park-Middle Park-West St Kilda,2018,63913.25,39.75,0.5565,14.55,30.75,1.710868,0.002702,2.522283,0.015753,22343.0,32378.0
2,Albert Park-Middle Park-West St Kilda,2019,66394.50,40.50,0.5565,14.55,30.75,1.609607,-0.029640,1.710125,0.014289,21515.0,32848.0
3,Albert Park-Middle Park-West St Kilda,2020,68526.50,41.00,0.5565,14.55,30.75,1.432587,-0.104868,-3.145769,-0.007462,20730.0,32603.0
4,Altona,2017,57094.25,40.50,0.4340,6.15,17.65,1.660228,-0.031851,3.031635,0.016142,15434.5,28442.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,Wodonga,2020,52784.00,42.50,0.4095,5.90,16.80,1.432587,-0.104868,-3.145769,-0.002972,17438.0,30620.0
572,Yarraville-Seddon,2017,66464.00,39.50,0.4255,5.85,17.65,1.660228,-0.031851,3.031635,0.008125,15568.5,25581.0
573,Yarraville-Seddon,2018,69039.50,39.50,0.4255,5.85,17.65,1.710868,0.002702,2.522283,0.007757,15741.5,25804.0
574,Yarraville-Seddon,2019,72503.50,39.75,0.4255,5.85,17.65,1.609607,-0.029640,1.710125,0.003260,15522.5,25907.0
