In [228]:
# import library
import pandas as pd
import folium
import geopandas as gpd
from folium.plugins import MarkerCluster
from folium.plugins import HeatMap
import branca.colormap as cm

In [229]:
# read data
school_data = pd.read_csv("../data/curated/school_location_cleaned_2023.csv")
train_data = pd.read_csv("../data/curated/train_station_cleaned.csv")
population_data = pd.read_csv("../data/curated/population_cleaned.csv")
rent_data = pd.read_csv("../data/curated/rent_cleaned.csv")
postcode_data = pd.read_csv("../data/raw/external/australian_postcodes.csv")
affordable_index = pd.read_csv('../data/curated/rent_income_2023-24.csv')

In [230]:
# Load the map file for post codes
shapefile_data = gpd.read_file("../data/raw/external/SA2_2021_ShapeFile/SA2_2021_AUST_GDA2020.shp")
shapefile_data = shapefile_data.rename(columns={'SA2_2021': 'Postcode'})
shapefile_data = shapefile_data[shapefile_data.is_valid]

In [231]:
#clean postcode data 
postcode_data = postcode_data[postcode_data['state'] == 'VIC']

# liveable index

### school count

In [232]:
#calculate and sort by school count nearby for each postcode
school_count = school_data.groupby('Address_Postcode').size().reset_index(name='school_count')
school_count = school_count.groupby('Address_Postcode').agg({'school_count': 'sum'}).reset_index()
school_count = school_count.sort_values(by='school_count',ascending=False)

In [233]:
school_count.head()

Unnamed: 0,Address_Postcode,school_count
20,3029,28
252,3350,28
564,3977,27
21,3030,26
50,3064,25


### train station count

In [234]:
#rename column lat and long in postcode data
postcode_data['latitude'] = postcode_data['lat']
postcode_data['longitude'] = postcode_data['long']

#merge postcode data and train data
train_count = pd.merge(postcode_data, train_data, on=['latitude', 'longitude'], how='left')

#calculate and sort nearby train station count for each postcode
train_count = train_count.groupby('postcode').size().reset_index(name='train_count')
train_count = train_count.groupby('postcode').agg({'train_count': 'sum'}).reset_index()
train_count = train_count.sort_values(by='train_count',ascending=False)

In [235]:
train_count.head()

Unnamed: 0,postcode,train_count
291,3352,62
324,3401,48
413,3551,41
508,3691,37
638,3875,35


### population count

In [236]:
#sort population data by 2023 population
population_sort=population_data.sort_values(by='2023',ascending=False)
population_sort=population_sort[['SA2 name','SA2 code','2023']]

#calculate and sort each postcode's population in 2023
population_count=pd.merge(population_sort, postcode_data, left_on='SA2 name', right_on='SA2_NAME_2021', how='inner')
population_count=population_count.groupby('postcode').agg({'2023':'sum'}).reset_index()
population_count=population_count[['postcode','2023']]
population_count=population_count.sort_values(by='2023',ascending=False)


In [237]:
population_count.head()

Unnamed: 0,postcode,2023
487,3691,548118
192,3221,527982
581,3821,488340
585,3825,470518
692,3995,395556


### rent count

In [238]:
#sort each postcode's mean rent
rent_count=rent_data.groupby('postcode').agg({'price_per_week':'mean'}).reset_index()
rent_count=rent_count.sort_values(by='price_per_week')

In [239]:
rent_count.head()

Unnamed: 0,postcode,price_per_week
329,3981,200.0
214,3409,200.0
212,3396,200.0
193,3318,230.0
285,3825,237.5


### calculate liveablity

In [240]:
#merge all data of count
liveable_count = school_count.merge(train_count,left_on='Address_Postcode',right_on='postcode',how='outer')\
    .merge(population_count, on='postcode', how='outer')\
        .merge(rent_count, on='postcode', how='outer')

#define weight of each element
school_weight = 0.3
train_weight = 0.3
population_weight = 0.2
rent_weight = 0.2

#use Z-score to nomalization
liveable_count['school_count'] = (liveable_count['school_count'] - liveable_count['school_count'].mean()) / liveable_count['school_count'].std()
liveable_count['train_count'] = (liveable_count['train_count'] - liveable_count['train_count'].mean()) / liveable_count['train_count'].std()
liveable_count['2023'] = (liveable_count['2023'] - liveable_count['2023'].mean()) / liveable_count['2023'].std()
liveable_count['price_per_week'] = (liveable_count['price_per_week'] - liveable_count['price_per_week'].mean()) / liveable_count['price_per_week'].std()

#calculate liveability for each postcode
liveable_count['liveability_index'] = (liveable_count['school_count'] * school_weight + liveable_count['train_count'] * train_weight + liveable_count['2023'] * population_weight - liveable_count['price_per_week'] * rent_weight)

#sort by liveability
liveable_count = liveable_count.sort_values(by='liveability_index', ascending=False)


In [241]:
liveable_count.iloc[:10,:]

Unnamed: 0,Address_Postcode,school_count,postcode,train_count,2023,price_per_week,liveability_index
291,3352.0,2.191766,3352,9.193708,4.561803,-0.468051,4.421613
610,3825.0,2.191766,3825,4.217947,6.837458,-1.668083,3.624022
289,3350.0,5.848248,3350,2.93388,4.017652,-0.825282,3.603225
508,3691.0,0.972939,3691,5.180997,8.09385,0.490835,3.366784
413,3551.0,1.704235,3551,5.823031,1.998877,-0.269292,2.711814
638,3875.0,1.704235,3875,4.859981,3.215083,-0.376762,2.687634
412,3550.0,4.14189,3550,1.810321,3.166366,-0.183337,2.455604
708,3977.0,5.604483,3977,1.168287,1.848191,-0.120009,2.425471
307,3377.0,0.972939,3377,3.415405,2.780349,-0.841347,2.040842
360,3461.0,-0.002123,3461,3.736422,4.01171,-0.222274,1.967086


In [242]:
# define function to extract location points
def extract_location_points(lat_list, lng_list, additional_data):
    points = []
    for lat, lng, data in zip(lat_list, lng_list, additional_data):
        points.append((lat, lng, data))
    return points

In [243]:
# process school data
school_points = extract_location_points(school_data["Y"], school_data["X"], school_data["School_Type"])

# process train data
train_points = extract_location_points(train_data["latitude"], train_data["longitude"], train_data["Stop_ID"])

#process rent data
rent_points = extract_location_points(rent_data["latitude"], rent_data["longitude"],rent_data["price_per_week"])

# process population data
postcode_mapping = pd.read_csv("../data/raw/external/australian_postcodes.csv")[["SA2_CODE_2021", "lat", "long"]]
postcode_dict = {row['SA2_CODE_2021']: (row['lat'], row['long']) for _, row in postcode_mapping.iterrows()}

population_points = [(postcode_dict[suburb][0], postcode_dict[suburb][1], pop) 
                     for suburb, pop in zip(population_data["SA2 code"], population_data["2023"]) 
                     if suburb in postcode_dict]

#process postcode data
postcode_points = extract_location_points(postcode_data["lat"], postcode_data["long"], postcode_data["postcode"])

In [244]:
# define function to add marker clusters
def create_marker_cluster(map_object, name, icon, color, data_points):
    feature_group = folium.FeatureGroup(name=name, control=True)
    marker_cluster = MarkerCluster(name=name).add_to(feature_group)
    for lat, lng, info in data_points:
        folium.Marker(
            location=[lat, lng],
            icon=folium.Icon(icon=icon, color=color, icon_color='white', prefix="fa"),
            popup=str(info)
        ).add_to(marker_cluster)
    map_object.add_child(feature_group)

In [245]:
# create map
map_object=folium.Map(location=[-36, 145.3906], tiles='CartoDB positron', zoom_start=7)

In [246]:
#create heat map of population in VIC
HeatMap(population_points).add_to(map_object)

<folium.plugins.heat_map.HeatMap at 0x1c1b2a066d0>

In [247]:
#clean affordable index data
affordable_index.fillna(0, inplace=True) 

In [248]:
#merge shapefil data and affordable index data and clean
shapefile_data['SA2_CODE21'] = shapefile_data['SA2_CODE21'].astype(str)
affordable_index['SA2'] = affordable_index['SA2'].astype(str)

shapefile_data['SA2']=shapefile_data['SA2_CODE21']
rent_sa2 = gpd.GeoDataFrame(pd.merge(shapefile_data, affordable_index, on='SA2', how='inner'))

In [249]:
# add markers on map
create_marker_cluster(map_object, 'School_Layer', 'university', 'black', school_points)
create_marker_cluster(map_object, 'Train_Layer', 'train', 'green', train_points)
create_marker_cluster(map_object, 'Postcode_Layer', 'map-marker', 'purple', postcode_points)
create_marker_cluster(map_object, 'Rent_Layer', 'map-marker','gray', rent_points)

In [250]:
#create affordable index gradient map
folium.Choropleth(
    geo_data=rent_sa2, 
    name='choropleth',
    data=affordable_index, 
    columns=['SA2', 'affordable index of 2023-24'],  
    key_on='feature.properties.SA2',  
    fill_color='YlOrRd',  
    fill_opacity=0.7,
    line_opacity=0.2,
    nah_fill_color=None,
    legend_name='Affordable Index'
).add_to(map_object)

<folium.features.Choropleth at 0x1c1f27d2a50>

In [251]:
# add layer control
folium.LayerControl().add_to(map_object)

<folium.map.LayerControl at 0x1c1b2e32e90>

In [252]:
# save map to html file
map_object.save('../plots/livable.html')