Before running this notebook, ensure that you have run the `preprocess_demographics.ipynb` notebook.

In [46]:
import pandas as pd
import matplotlib.pyplot as plt

demographics = pd.read_csv('../data/raw/demographics.csv')
demographics.columns

Index(['sa2_name', 'erp_june_2022_count', 'erp_june_2023_count',
       'erp_change_count', 'erp_change_percentage', 'natural_increase_count',
       'net_internal_migration_count', 'net_overseas_migration_count',
       'area_km2', 'pop_density_persons_km2', 'gov_age_pension_count_2023',
       'gov_rent_assist_count_2023', 'personal_income_count_2020',
       'personal_income_median_age_2020',
       'personal_total_income_millions_2020',
       'median_personal_total_income_2020', 'mean_personal_total_income_2020',
       'gini_coef_2020'],
      dtype='object')

### Getting the Top 10 Suburbs for each Feature of Interest

In [47]:
# Population columns
top_suburbs_count = demographics.sort_values(by='erp_change_count', ascending=False).head(10)
top_suburbs_percentage = demographics.sort_values(by='erp_change_percentage', ascending=False).head(10)
top_pop_density = demographics.sort_values(by='pop_density_persons_km2', ascending=False).head(10)

bottom_subrbs_count = demographics.sort_values(by='erp_change_count', ascending=True).head(10)
bottom_suburbs_percentage = demographics.sort_values(by='erp_change_percentage', ascending=True).head(10)
bottom_pop_density = demographics.sort_values(by='pop_density_persons_km2', ascending=True).head(10)

# Income columns
top_income_count = demographics.sort_values(by='personal_income_count_2020', ascending=False).head(10)
top_income_median_age = demographics.sort_values(by='personal_income_median_age_2020', ascending=False).head(10)
top_income_millions = demographics.sort_values(by='personal_total_income_millions_2020', ascending=False).head(10)
top_median_income = demographics.sort_values(by='median_personal_total_income_2020', ascending=False).head(10)
top_mean_income = demographics.sort_values(by='mean_personal_total_income_2020', ascending=False).head(10)

bottom_income_count = demographics.sort_values(by='personal_income_count_2020', ascending=True).head(10)
bottom_income_median_age = demographics.sort_values(by='personal_income_median_age_2020', ascending=True).head(10)
bottom_income_millions = demographics.sort_values(by='personal_total_income_millions_2020', ascending=True).head(10)
bottom_median_income = demographics.sort_values(by='median_personal_total_income_2020', ascending=True).head(10)
bottom_mean_income = demographics.sort_values(by='mean_personal_total_income_2020', ascending=True).head(10)

# Subsidy columns
top_rent_assist = demographics.sort_values(by='gov_rent_assist_count_2023', ascending=False).head(10)
top_pension = demographics.sort_values(by='gov_age_pension_count_2023', ascending=False).head(10)
top_inequality = demographics.sort_values(by='gini_coef_2020', ascending=False).head(11)

bottom_rent_assist = demographics.sort_values(by='gov_rent_assist_count_2023', ascending=True).head(10)
bottom_pension = demographics.sort_values(by='gov_age_pension_count_2023', ascending=True).head(10)
bottom_inequality = demographics.sort_values(by='gini_coef_2020', ascending=True).head(11)

### Population Analysis

In [48]:
print(top_suburbs_count[['sa2_name', 'erp_change_count']])

'''
# Create the bar chart
plt.bar(top_suburbs_count['sa2_name'], top_suburbs_count['erp_change_count'])

# Add titles and labels
plt.title('Top 10 Suburbs by Population Growth (2022-2023)')
plt.xlabel('Suburb')
plt.ylabel('Population Growth')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show
'''

                            sa2_name  erp_change_count
422        Rockbank - Mount Cottrell            4299.0
430           Fraser Rise - Plumpton            3429.0
293               Mickleham - Yuroke            3408.0
127                          Carlton            3319.0
136            Melbourne CBD - North            2690.0
137             Melbourne CBD - West            2684.0
364              Clyde North - South            2679.0
442                  Werribee - West            2608.0
253                          Wollert            2514.0
446  Tarneit (West) - Mount Cottrell            2199.0


"\n# Create the bar chart\nplt.bar(top_suburbs_count['sa2_name'], top_suburbs_count['erp_change_count'])\n\n# Add titles and labels\nplt.title('Top 10 Suburbs by Population Growth (2022-2023)')\nplt.xlabel('Suburb')\nplt.ylabel('Population Growth')\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show\n"

In [49]:
print(top_suburbs_percentage[['sa2_name', 'erp_change_percentage']])

'''
# Create the bar chart
plt.bar(top_suburbs_percentage['sa2_name'], top_suburbs_percentage['erp_change_percentage'])

# Add titles and labels
plt.title('Top 10 Suburbs by Population Growth Percentage (2022-2023)')
plt.xlabel('Suburb')
plt.ylabel('Population Growth Percentage (%)')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show()
'''

                            sa2_name  erp_change_percentage
430           Fraser Rise - Plumpton                   26.4
448                  Tarneit - North                   18.9
392   Clayton (North) - Notting Hill                   18.8
422        Rockbank - Mount Cottrell                   18.7
127                          Carlton                   18.4
446  Tarneit (West) - Mount Cottrell                   16.8
135             Melbourne CBD - East                   16.1
137             Melbourne CBD - West                   15.5
428        Cobblebank - Strathtulloh                   15.5
364              Clyde North - South                   14.9


"\n# Create the bar chart\nplt.bar(top_suburbs_percentage['sa2_name'], top_suburbs_percentage['erp_change_percentage'])\n\n# Add titles and labels\nplt.title('Top 10 Suburbs by Population Growth Percentage (2022-2023)')\nplt.xlabel('Suburb')\nplt.ylabel('Population Growth Percentage (%)')\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show()\n"

In [50]:
print(top_pop_density[['sa2_name', 'pop_density_persons_km2']])

'''
# Create the bar chart
plt.bar(top_pop_density['sa2_name'], top_pop_density['pop_density_persons_km2'])

# Add titles and labels
plt.title('Top 10 Suburbs by Population Density')
plt.xlabel('Suburb')
plt.ylabel('Population Density')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show()
'''

                           sa2_name  pop_density_persons_km2
136           Melbourne CBD - North                  38401.0
141                Southbank - East                  23318.4
137            Melbourne CBD - West                  19925.4
135            Melbourne CBD - East                  15471.3
127                         Carlton                  11753.5
154             South Yarra - North                  11061.9
142    West Melbourne - Residential                  10578.4
140  Southbank (West) - South Wharf                   8731.2
149              St Kilda - Central                   8576.3
159                         Fitzroy                   8334.4


"\n# Create the bar chart\nplt.bar(top_pop_density['sa2_name'], top_pop_density['pop_density_persons_km2'])\n\n# Add titles and labels\nplt.title('Top 10 Suburbs by Population Density')\nplt.xlabel('Suburb')\nplt.ylabel('Population Density')\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show()\n"

### Income Analysis

In [51]:
print(top_income_count[['sa2_name', 'personal_income_count_2020']])

'''
# Create the bar chart
plt.bar(top_income_count['sa2_name'], top_income_count['personal_income_count_2020'])

# Add titles and labels
plt.title('Top 10 Suburbs by Number of People who Earn Income (2020)')
plt.xlabel('Suburb')
plt.ylabel('Count')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show()
'''

                        sa2_name  personal_income_count_2020
190              Brighton (Vic.)                     16061.0
195         Bentleigh - McKinnon                     15433.0
234                       Eltham                     14950.0
177                    Doncaster                     14852.0
459                   Langwarrin                     14805.0
210  Highett (East) - Cheltenham                     14780.0
152            Prahran - Windsor                     14750.0
197            Caulfield - North                     14651.0
41                       Highton                     14540.0
216          Malvern - Glen Iris                     14476.0


"\n# Create the bar chart\nplt.bar(top_income_count['sa2_name'], top_income_count['personal_income_count_2020'])\n\n# Add titles and labels\nplt.title('Top 10 Suburbs by Number of People who Earn Income (2020)')\nplt.xlabel('Suburb')\nplt.ylabel('Count')\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show()\n"

In [52]:
print(top_income_median_age[['sa2_name', 'personal_income_median_age_2020']])

'''
# Create the bar chart
plt.bar(top_income_median_age['sa2_name'], top_income_median_age['personal_income_median_age_2020'])

# Add titles and labels
plt.title('Top 10 Suburbs by Median Age of Income Earners (2020)')
plt.xlabel('Suburb')
plt.ylabel('Median Age')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show()
'''

                         sa2_name  personal_income_median_age_2020
105                   Alps - West                             59.0
93                  French Island                             57.0
54   Point Lonsdale - Queenscliff                             57.0
463                      Flinders                             55.0
91                    Paynesville                             55.0
53                  Portarlington                             54.0
52               Lorne - Anglesea                             53.0
68              Benalla Surrounds                             52.0
59                          Euroa                             52.0
206                      Braeside                             52.0


"\n# Create the bar chart\nplt.bar(top_income_median_age['sa2_name'], top_income_median_age['personal_income_median_age_2020'])\n\n# Add titles and labels\nplt.title('Top 10 Suburbs by Median Age of Income Earners (2020)')\nplt.xlabel('Suburb')\nplt.ylabel('Median Age')\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show()\n"

In [53]:
print(top_income_millions[['sa2_name', 'personal_total_income_millions_2020']])

'''
# Create the bar chart
plt.bar(top_income_millions['sa2_name'], top_income_millions['personal_total_income_millions_2020'])

# Add titles and labels
plt.title('Top 10 Suburbs by Total Income in Millions (2020)')
plt.xlabel('Suburb')
plt.ylabel('Income in Millions ($)')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show()
'''

                             sa2_name  personal_total_income_millions_2020
190                   Brighton (Vic.)                               2244.0
153                            Toorak                               1777.4
216               Malvern - Glen Iris                               1743.8
167                        Camberwell                               1490.8
217                      Malvern East                               1435.5
197                 Caulfield - North                               1435.4
143                       Albert Park                               1392.2
152                 Prahran - Windsor                               1339.8
171  Surrey Hills (West) - Canterbury                               1257.3
195              Bentleigh - McKinnon                               1217.2


"\n# Create the bar chart\nplt.bar(top_income_millions['sa2_name'], top_income_millions['personal_total_income_millions_2020'])\n\n# Add titles and labels\nplt.title('Top 10 Suburbs by Total Income in Millions (2020)')\nplt.xlabel('Suburb')\nplt.ylabel('Income in Millions ($)')\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show()\n"

In [54]:
print(top_median_income[['sa2_name', 'median_personal_total_income_2020']])

'''
# Create the bar chart
plt.bar(top_median_income['sa2_name'], top_median_income['median_personal_total_income_2020'])

# Add titles and labels
plt.title('Top 10 Suburbs by Median Income (2020)')
plt.xlabel('Suburb')
plt.ylabel('Median Income ($)')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show()
'''

                        sa2_name  median_personal_total_income_2020
134  West Melbourne - Industrial                            90927.0
145               Port Melbourne                            78187.0
129               East Melbourne                            77804.0
153                       Toorak                            74766.0
97            Wilsons Promontory                            73640.0
411                      Newport                            72348.0
151                     Armadale                            72012.0
161    Clifton Hill - Alphington                            71765.0
419                   Yarraville                            71512.0
131            Kensington (Vic.)                            70336.0


"\n# Create the bar chart\nplt.bar(top_median_income['sa2_name'], top_median_income['median_personal_total_income_2020'])\n\n# Add titles and labels\nplt.title('Top 10 Suburbs by Median Income (2020)')\nplt.xlabel('Suburb')\nplt.ylabel('Median Income ($)')\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show()\n"

In [55]:
print(top_mean_income[['sa2_name', 'mean_personal_total_income_2020']])

'''
# Create the bar chart
plt.bar(top_mean_income['sa2_name'], top_mean_income['mean_personal_total_income_2020'])

# Add titles and labels
plt.title('Top 10 Suburbs by Mean Income (2020)')
plt.xlabel('Suburb')
plt.ylabel('Mean Income ($)')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show()
'''

                             sa2_name  mean_personal_total_income_2020
153                            Toorak                         179140.0
190                   Brighton (Vic.)                         139718.0
129                    East Melbourne                         136633.0
133                South Yarra - West                         130661.0
216               Malvern - Glen Iris                         120464.0
143                       Albert Park                         119226.0
151                          Armadale                         114804.0
171  Surrey Hills (West) - Canterbury                         113248.0
223          Ivanhoe East - Eaglemont                         112895.0
145                    Port Melbourne                         108763.0


"\n# Create the bar chart\nplt.bar(top_mean_income['sa2_name'], top_mean_income['mean_personal_total_income_2020'])\n\n# Add titles and labels\nplt.title('Top 10 Suburbs by Mean Income (2020)')\nplt.xlabel('Suburb')\nplt.ylabel('Mean Income ($)')\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show()\n"

### Subsidy Analysis

In [56]:
print(top_rent_assist[['sa2_name', 'gov_rent_assist_count_2023']])

'''
# Create the bar chart
plt.bar(top_rent_assist['sa2_name'], top_rent_assist['gov_rent_assist_count_2023'])

# Add titles and labels
plt.title('Top 10 Suburbs by Number of People who Require Rent Assistance')
plt.xlabel('Suburb')
plt.ylabel('Count')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show()
'''

                          sa2_name  gov_rent_assist_count_2023
380              Dandenong - North                      2593.0
456                      Frankston                      2370.0
101                        Morwell                      1868.0
23   Kangaroo Flat - Golden Square                      1842.0
457                Frankston North                      1772.0
507        Shepparton - South East                      1717.0
98           Wonthaggi - Inverloch                      1712.0
434      Melton South - Weir Views                      1697.0
464              Hastings - Somers                      1692.0
468               Rosebud - McCrae                      1654.0


"\n# Create the bar chart\nplt.bar(top_rent_assist['sa2_name'], top_rent_assist['gov_rent_assist_count_2023'])\n\n# Add titles and labels\nplt.title('Top 10 Suburbs by Number of People who Require Rent Assistance')\nplt.xlabel('Suburb')\nplt.ylabel('Count')\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show()\n"

In [57]:
print(top_pension[['sa2_name', 'gov_age_pension_count_2023']])

'''
# Create the bar chart
plt.bar(top_pension['sa2_name'], top_pension['gov_age_pension_count_2023'])

# Add titles and labels
plt.title('Top 10 Suburbs by Number of People who are on Age Pension')
plt.xlabel('Suburb')
plt.ylabel('Count')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show()
'''

                          sa2_name  gov_age_pension_count_2023
468               Rosebud - McCrae                      4834.0
98           Wonthaggi - Inverloch                      4823.0
23   Kangaroo Flat - Golden Square                      3542.0
245                     Thomastown                      3299.0
464              Hastings - Somers                      3290.0
70                      Wangaratta                      3203.0
49        Grovedale - Mount Duneed                      2929.0
100               Moe - Newborough                      2884.0
520            Warrnambool - North                      2842.0
373                Dandenong North                      2823.0


"\n# Create the bar chart\nplt.bar(top_pension['sa2_name'], top_pension['gov_age_pension_count_2023'])\n\n# Add titles and labels\nplt.title('Top 10 Suburbs by Number of People who are on Age Pension')\nplt.xlabel('Suburb')\nplt.ylabel('Count')\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show()\n"

In [58]:
print(top_inequality[['sa2_name', 'gini_coef_2020']])

'''
# Create the bar chart
plt.bar(top_inequality['sa2_name'], top_inequality['gini_coef_2020'])

# Add titles and labels
plt.title('Top 11 Suburbs by Income Inequality (2020)')
plt.xlabel('Suburb')
plt.ylabel('Gini Coefficient') # 0 < x < 1, where 1 is maximum income inequality

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Display the bar chart
plt.tight_layout()
plt.show()
'''

                             sa2_name  gini_coef_2020
26                White Hills - Ascot           1.511
153                            Toorak           0.709
110                            Yarram           0.648
190                   Brighton (Vic.)           0.643
133                South Yarra - West           0.625
463                          Flinders           0.616
483                 Mildura Surrounds           0.613
171  Surrey Hills (West) - Canterbury           0.610
129                    East Melbourne           0.607
143                       Albert Park           0.607
165                            Balwyn           0.598


"\n# Create the bar chart\nplt.bar(top_inequality['sa2_name'], top_inequality['gini_coef_2020'])\n\n# Add titles and labels\nplt.title('Top 11 Suburbs by Income Inequality (2020)')\nplt.xlabel('Suburb')\nplt.ylabel('Gini Coefficient') # 0 < x < 1, where 1 is maximum income inequality\n\n# Rotate x-axis labels for better readability\nplt.xticks(rotation=45, ha='right')\n\n# Display the bar chart\nplt.tight_layout()\nplt.show()\n"