# Population forecast

This notebook is used to forecast the population from 2024 to 2026 using data from 2002 to 2023 sourced from [ABS website](https://www.abs.gov.au/statistics/people/population/regional-population/2022-23/32180DS0003_2001-23.xlsx), with projections based on the historical growth rate, and finally visualizing the results on an SA2 shapefile.









In [1]:
import pandas as pd
import geopandas as gpd
import folium

## Population Forecast

In [2]:
# Load the dataset
population_vic = pd.read_csv("../data/curated/population_cleaned.csv")
population_vic = population_vic.rename(columns=lambda x: int(x) if x.isdigit() else x) # Convert some column name into integer
population_vic.head()

Unnamed: 0,GCCSA name,SA2 code,SA2 name,2001,2002,2003,2004,2005,2006,2007,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Rest of Vic.,201011001,Alfredton,5756,6092,6293,6480,6648,6761,7034,...,10338,11039,11852,12649,13537,14434,15507,16841,18002,18997
1,Rest of Vic.,201011002,Ballarat,11497,11708,12015,12189,12269,12356,12408,...,12327,12300,12301,12266,12244,12320,12196,12071,11938,11809
2,Rest of Vic.,201011005,Buninyong,5320,5399,5557,5620,5857,6037,6131,...,7082,7191,7311,7409,7418,7458,7377,7229,7247,7323
3,Rest of Vic.,201011006,Delacombe,4154,4225,4371,4465,4704,5041,5206,...,6583,6846,7195,7622,8183,8890,9755,10648,11798,12869
4,Rest of Vic.,201011007,Smythes Creek,3317,3378,3411,3473,3508,3542,3594,...,3945,3966,3990,4004,4042,4112,4152,4211,4223,4268


In [3]:
population_vic.set_index(['GCCSA name','SA2 code', 'SA2 name'], inplace=True)

# Calculate yearly growth rate for each SA2 region
growth_rates = population_vic.pct_change(axis=1)

# Compute the average growth rate 
average_growth_rate = growth_rates.mean(axis=1)
population_vic.loc[:,'growth_rate'] = average_growth_rate

# Check for NaN values
print(population_vic['growth_rate'].isnull().sum()) #2

# Check for infinite values
print((population_vic['growth_rate'] == float('inf')).sum()) # 5
print((population_vic['growth_rate'] == float('-inf')).sum()) #0

# Replace or remove invalid values
population_vic['growth_rate'] = population_vic['growth_rate'].replace([float('inf'), float('-inf')], pd.NA)
population_vic = population_vic.dropna(subset=['growth_rate'])

population_vic.reset_index(inplace=True)

# Calculate future population for 2024, 2025, 2026
future_years = [2024, 2025, 2026, 2027, 2028] 
for year in future_years:
    last_population = population_vic[year - 1]  # Last year's population
    population_vic.loc[:, year] = last_population * (1 + population_vic['growth_rate'])
population_vic.head()

2
5
0


Unnamed: 0,GCCSA name,SA2 code,SA2 name,2001,2002,2003,2004,2005,2006,2007,...,2020,2021,2022,2023,growth_rate,2024,2025,2026,2027,2028
0,Rest of Vic.,201011001,Alfredton,5756,6092,6293,6480,6648,6761,7034,...,15507,16841,18002,18997,0.055946,20059.806963,21182.07377,22367.126962,23618.479191,24939.839624
1,Rest of Vic.,201011002,Ballarat,11497,11708,12015,12189,12269,12356,12408,...,12196,12071,11938,11809,0.001261,11823.890388,11838.799551,11853.727514,11868.6743,11883.639933
2,Rest of Vic.,201011005,Buninyong,5320,5399,5557,5620,5857,6037,6131,...,7377,7229,7247,7323,0.01472,7430.791418,7540.169479,7651.157538,7763.779293,7878.058793
3,Rest of Vic.,201011006,Delacombe,4154,4225,4371,4465,4704,5041,5206,...,9755,10648,11798,12869,0.053085,13552.156808,14271.579311,15029.192689,15827.024324,16667.20922
4,Rest of Vic.,201011007,Smythes Creek,3317,3378,3411,3473,3508,3542,3594,...,4152,4211,4223,4268,0.011535,4317.229903,4367.027656,4417.399809,4468.352987,4519.893892


In [4]:
# Save for later use
population_vic.to_csv("../data/curated/population_forecast.csv",index=False)

In [5]:
# Load the shapefile
zones = gpd.read_file( "../data/raw/external/SA2_2021_ShapeFile/SA2_2021_AUST_GDA2020.shp")
zones['geometry'] = zones['geometry'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")

gdf = gpd.GeoDataFrame(
    pd.merge(population_vic, zones, left_on='SA2 name',right_on='SA2_NAME21', how='inner')
)

geoJSON = gdf[['SA2 name', 'geometry']].drop_duplicates('SA2 name').to_json()

# print the first 300 chars of the json
print(geoJSON[:300])

{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {"SA2 name": "Alfredton"}, "geometry": {"type": "Polygon", "coordinates": [[[143.78282104711133, -37.566657808073295], [143.75557764214773, -37.56346721632544], [143.7480171735439, -37.56259575584101], [143.73820


In [6]:
m = folium.Map(location=[-37.8, 144.9] , tiles="Cartodb Positron", zoom_start=10)

c = folium.Choropleth(
    geo_data=geoJSON, # geoJSON 
    name='choropleth', # name of plot
    data=population_vic.reset_index(), # data source
    columns=['SA2 name','growth_rate'], # the columns required
    key_on='properties.SA2 name', # this is from the geoJSON's properties
    fill_color='OrRd',  # Color map
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Population Growth Rate'
)

c.add_to(m)

<folium.features.Choropleth at 0x7f2ec68b9cf0>

In [7]:
# (y, x) since we want (lat, long)
gdf['centroid'] = gdf['geometry'].apply(lambda x: (x.centroid.y, x.centroid.x))
gdf.head()

Unnamed: 0,GCCSA name,SA2 code,SA2 name,2001,2002,2003,2004,2005,2006,2007,...,GCC_CODE21,GCC_NAME21,STE_CODE21,STE_NAME21,AUS_CODE21,AUS_NAME21,AREASQKM21,LOCI_URI21,geometry,centroid
0,Rest of Vic.,201011001,Alfredton,5756,6092,6293,6480,6648,6761,7034,...,2RVIC,Rest of Vic.,2,Victoria,AUS,Australia,52.7109,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((143.78282 -37.56666, 143.75558 -37.5...","(-37.54173636281507, 143.749330252453)"
1,Rest of Vic.,201011002,Ballarat,11497,11708,12015,12189,12269,12356,12408,...,2RVIC,Rest of Vic.,2,Victoria,AUS,Australia,12.3787,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((143.81896 -37.55582, 143.81644 -37.5...","(-37.5561439450457, 143.83665489612585)"
2,Rest of Vic.,201011005,Buninyong,5320,5399,5557,5620,5857,6037,6131,...,2RVIC,Rest of Vic.,2,Victoria,AUS,Australia,51.5855,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((143.84171 -37.61596, 143.84176 -37.6...","(-37.643854141582494, 143.880777903821)"
3,Rest of Vic.,201011006,Delacombe,4154,4225,4371,4465,4704,5041,5206,...,2RVIC,Rest of Vic.,2,Victoria,AUS,Australia,34.1607,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((143.7505 -37.59119, 143.75044 -37.59...","(-37.58222851797997, 143.77847784283048)"
4,Rest of Vic.,201011007,Smythes Creek,3317,3378,3411,3473,3508,3542,3594,...,2RVIC,Rest of Vic.,2,Victoria,AUS,Australia,104.7274,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((143.73296 -37.62333, 143.73263 -37.6...","(-37.62024909240558, 143.74623319717654)"


In [8]:
# Find top 5 SA2 suburbs with the highest population growth rates
top_5_population_growth = population_vic.sort_values(by='growth_rate', ascending=False).head(5)

# Select the relevant columns: 'GCCSA name', 'SA2 code', 'SA2 name', and 'growth_rate'
top_5_population_growth = top_5_population_growth[['SA2 code', 'SA2 name', 'growth_rate']]

top_5_population_growth


Unnamed: 0,SA2 code,SA2 name,growth_rate
357,212031556,Clyde North - South,0.495583
433,213051466,Point Cook - South,0.450253
360,212031559,Cranbourne North - East,0.435784
356,212031555,Clyde North - North,0.427336
250,209041528,Doreen - South,0.424444


In [9]:
# Place a marker
for zone_name, coord in gdf.loc[gdf['SA2 code'].isin(top_5_population_growth['SA2 code']), ['SA2 name', 'centroid']].values:
    m.add_child(
        folium.Marker(location=coord, popup=zone_name)
    )
m.save('../plots/population_growth_rate.html')