# Affluence

This notebook aims to determine the affluence of each SA2 suburb by calculating an Affluence Index, which is a weighted combination of 70% average income and 30% population growth rate. The results are then visualized using an SA2 shapefile.

In [14]:
import pandas as pd
import geopandas as gpd
import folium

## Affluence Index

In [15]:
income = pd.read_csv("../data/curated/income_cleaned.csv")

In [16]:
# Change data type to integer
for col in ['2016-17', '2017-18', '2018-19', '2019-20', '2020-21']:
    income[col] = pd.to_numeric(income[col].str.replace(',', ''), errors='coerce')


In [17]:
# Calculate average income across years
income['average_income'] = income[['2016-17', '2017-18', '2018-19', '2019-20', '2020-21']].mean(axis=1)

# Calculate income growth rate from 2016-17 to 2020-21
income['growth_rate'] = (income['2020-21'] - income['2016-17']) / income['2016-17']

# Affluence index by combining 70% of average income and 30% of growth rate
income['affluence_index'] = income['average_income'] * 0.7 + income['growth_rate'] * 0.3
income.to_csv("../data/curated/income_with_affluence.csv")

# Sort
income = income.sort_values(by='affluence_index', ascending=False)

income[['SA2', 'SA2 NAME', 'affluence_index']].head()


Unnamed: 0,SA2,SA2 NAME,affluence_index
145,206051130,Port Melbourne,53425.708198
129,206041119,East Melbourne,52700.952595
134,206041127,West Melbourne - Industrial,52312.491414
162,206071517,Richmond (South) - Cremorne,52276.060451
153,206061138,Toorak,51622.502428


## Visualization of Affluence Index

In [18]:
# Load SA2 shapefile
zones = gpd.read_file( "../data/raw/external/SA2_2021_ShapeFile/SA2_2021_AUST_GDA2020.shp")
zones['geometry'] = zones['geometry'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")

In [19]:
gdf = gpd.GeoDataFrame(
    pd.merge(income, zones, left_on='SA2 NAME',right_on='SA2_NAME21', how='inner')
)

# create a JSON 
geoJSON = gdf[['SA2_NAME21', 'geometry']].drop_duplicates('SA2_NAME21').to_json()

# print the first 300 chars of the json
print(geoJSON[:300])

{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {"SA2_NAME21": "Port Melbourne"}, "geometry": {"type": "Polygon", "coordinates": [[[144.91690867221345, -37.83653682439829], [144.91721498216685, -37.83647492441878], [144.9198837217586, -37.83596199459752], [144


In [20]:
m = folium.Map(location=[-37.8, 144.9] , tiles="Cartodb Positron", zoom_start=10)


c = folium.Choropleth(
    geo_data=geoJSON, # geoJSON 
    name='choropleth', # name of plot
    data=income.reset_index(), # data source
    columns=['SA2 NAME','affluence_index'], # the columns required
    key_on='properties.SA2_NAME21', # this is from the geoJSON's properties
    fill_color='OrRd',  # Color map
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='affluence index'
)

c.add_to(m)

<folium.features.Choropleth at 0x28fdcd950>

In [21]:
# (y, x) since we want (lat, long)
gdf['centroid'] = gdf['geometry'].apply(lambda x: (x.centroid.y, x.centroid.x))

# Find top 5 SA2 suburbs with the highest affluence index
top_5_population_growth = income.sort_values(by='affluence_index', ascending=False).head(5)

# Select the relevant columns: 'GCCSA name', 'SA2 code', 'SA2 name', and 'growth_rate'
top_5_population_growth = top_5_population_growth[['SA2', 'SA2 NAME', 'affluence_index']]

# Display the result
print(top_5_population_growth)

           SA2                     SA2 NAME  affluence_index
145  206051130               Port Melbourne     53425.708198
129  206041119               East Melbourne     52700.952595
134  206041127  West Melbourne - Industrial     52312.491414
162  206071517  Richmond (South) - Cremorne     52276.060451
153  206061138                       Toorak     51622.502428


In [22]:
for zone_name, coord in gdf.loc[gdf['SA2'].isin(top_5_population_growth['SA2']), ['SA2 NAME', 'centroid']].values:
    m.add_child(
        folium.Marker(location=coord, popup=zone_name)
    )
m.save('../plots/affluence_index.html')