In [None]:
import numpy as np
import scipy as sp
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd

In [None]:
sf = gpd.read_file("../data/ABS/SA2 shapefile/SA2_2021_AUST_GDA2020.shp")
sf = sf.drop(2472)
sf

In [None]:
gdf = gpd.GeoDataFrame(sf)
gdf['SA2_CODE21'] = gdf['SA2_CODE21'].astype('float')
gdf.head()

In [None]:
# create a JSON 
geoJSON = gdf[['SA2_CODE21', 'geometry']].drop_duplicates('SA2_CODE21').to_json()

In [None]:
import folium

<h2>1. Observe the distribution of population based on SA2</h2>

In [None]:
df = pd.read_csv('../data/curated/clean_population.csv')

df_population = df[['sa2', '2021_population']]
df_population = df_population[df_population['2021_population'] < 30000]
df_population

In [None]:
_map = folium.Map(location=[25.2744, 133.7751], tiles="Stamen Terrain", zoom_start=4)

# refer to the folium documentations on more information on how to plot aggregated data.
c = folium.Choropleth(
    geo_data=geoJSON, # geoJSON 
    name='choropleth', # name of plot
    data=df_population, # data source
    columns=['sa2', '2021_population'], # the columns required
    key_on='properties.SA2_CODE21', # this is from the geoJSON's properties
    fill_color='YlOrRd', # color scheme
    nan_fill_color='white',
    legend_name='Population'
)

c.add_to(_map)
_map

<h2>2. Observe the distribution of personally income based on SA2</h2>

In [None]:
df_income = pd.read_csv("../data/curated/clean_annual.csv")

In [None]:
df_income["sa2"] = df_income["sa2"].astype(int)

In [None]:
df_merge = df_income.merge(df_population, on='sa2', how='inner')
df_merge = df_merge[df_merge["2021_population"] != 0.0]
df_merge

In [None]:
# 选择计算地区总年收入/总人口数
df_merge['income_per_person'] = df_merge['income_sum'] / df_merge['2021_population']

In [None]:
df_merge = df_merge[df_merge["income_per_person"] > 0]
df_merge = df_merge[df_merge["income_per_person"] < 80000]
df_merge = df_merge[['sa2', 'income_per_person']]
df_merge.describe()

In [None]:
_map = folium.Map(location=[25.2744, 133.7751], tiles="Stamen Terrain", zoom_start=4)

# refer to the folium documentations on more information on how to plot aggregated data.
c = folium.Choropleth(
    geo_data=geoJSON, # geoJSON 
    name='choropleth', # name of plot
    data=df_merge, # data source
    columns=['sa2', 'income_per_person'], # the columns required
    key_on='properties.SA2_CODE21', # this is from the geoJSON's properties
    fill_color='YlOrRd', # color scheme
    nan_fill_color='white',
    legend_name='Average Income'
)

c.add_to(_map)
_map