In [78]:
# import library
import pandas as pd
import folium
import geopandas as gpd
from folium.plugins import MarkerCluster
from folium.plugins import HeatMap
import branca.colormap as cm

In [79]:
# read data
school_data = pd.read_csv("../data/curated/school_location_cleaned_2023.csv")
train_data = pd.read_csv("../data/curated/train_station_cleaned.csv")
population_data = pd.read_csv("../data/curated/population_cleaned.csv")
rent_data = pd.read_csv("../data/curated/rent_cleaned.csv")
postcode_data = pd.read_csv("../data/raw/external/australian_postcodes.csv")
affordable_index = pd.read_csv('../data/curated/rent_income_2023-24.csv')

In [80]:
# Load the map file for post codes
shapefile_data = gpd.read_file("../data/raw/external/SA2_2021_ShapeFile/SA2_2021_AUST_GDA2020.shp")
shapefile_data = shapefile_data.rename(columns={'SA2_2021': 'Postcode'})
shapefile_data = shapefile_data[shapefile_data.is_valid]

In [81]:
#clean postcode data 
postcode_data = postcode_data[postcode_data['state'] == 'VIC']

# liveable index

### school count

In [82]:
#calculate and sort by school count nearby for each postcode
school_count = school_data.groupby('Address_Postcode').size().reset_index(name='school_count')
school_count = school_count.groupby('Address_Postcode').agg({'school_count': 'sum'}).reset_index()
school_count = school_count.sort_values(by='school_count',ascending=False)

In [83]:
school_count.head()

Unnamed: 0,Address_Postcode,school_count
20,3029,28
252,3350,28
564,3977,27
21,3030,26
50,3064,25


### train station count

In [84]:
#rename column lat and long in postcode data
postcode_data['latitude'] = postcode_data['lat']
postcode_data['longitude'] = postcode_data['long']

#merge postcode data and train data
train_count = pd.merge(postcode_data, train_data, on=['latitude', 'longitude'], how='left')

#calculate and sort nearby train station count for each postcode
train_count = train_count.groupby('postcode').size().reset_index(name='train_count')
train_count = train_count.groupby('postcode').agg({'train_count': 'sum'}).reset_index()
train_count = train_count.sort_values(by='train_count',ascending=False)

In [85]:
train_count.head()

Unnamed: 0,postcode,train_count
291,3352,62
324,3401,48
413,3551,41
508,3691,37
638,3875,35


### population count

In [86]:
#sort population data by 2023 population
population_sort=population_data.sort_values(by='2023',ascending=False)
population_sort=population_sort[['SA2 name','SA2 code','2023']]

#calculate and sort each postcode's population in 2023
population_count=pd.merge(population_sort, postcode_data, left_on='SA2 name', right_on='SA2_NAME_2021', how='inner')
population_count=population_count.groupby('postcode').agg({'2023':'sum'}).reset_index()
population_count=population_count[['postcode','2023']]
population_count=population_count.sort_values(by='2023',ascending=False)


In [87]:
population_count.head()

Unnamed: 0,postcode,2023
487,3691,548118
192,3221,527982
581,3821,488340
585,3825,470518
692,3995,395556


### rent count

In [88]:
#sort each postcode's mean rent
rent_count=rent_data.groupby('postcode').agg({'price_per_week':'mean'}).reset_index()
rent_count=rent_count.sort_values(by='price_per_week')

In [89]:
rent_count.head()

Unnamed: 0,postcode,price_per_week
329,3981,200.0
214,3409,200.0
212,3396,200.0
193,3318,230.0
285,3825,237.5


### calculate liveablity

In [90]:
#merge all data of count
liveable_count = school_count.merge(train_count,left_on='Address_Postcode',right_on='postcode',how='outer')\
    .merge(population_count, on='postcode', how='outer')\
        .merge(rent_count, on='postcode', how='outer')

#define weight of each element
school_weight = 0.2
train_weight = 0.3
population_weight = 0.2
rent_weight = 0.3

#use Z-score to nomalization
liveable_count['school_count'] = (liveable_count['school_count'] - liveable_count['school_count'].min()) / (liveable_count['school_count'].max() - liveable_count['school_count'].min())
liveable_count['train_count'] = (liveable_count['train_count'] - liveable_count['train_count'].min()) / (liveable_count['train_count'].max() - liveable_count['train_count'].min())
liveable_count['2023'] = (liveable_count['2023'] - liveable_count['2023'].min()) / (liveable_count['2023'].max() - liveable_count['2023'].min())
liveable_count['price_per_week'] = (liveable_count['price_per_week'] - liveable_count['price_per_week'].min()) / (liveable_count['price_per_week'].max() - liveable_count['price_per_week'].min())

#calculate liveability for each postcode
liveable_count['liveability_index'] = (liveable_count['school_count'] * school_weight + liveable_count['train_count'] * train_weight + liveable_count['2023'] * population_weight - liveable_count['price_per_week'] * rent_weight)

#sort by liveability
liveable_count = liveable_count.sort_values(by='liveability_index', ascending=False)




In [91]:
liveable_count.iloc[:10,:]

Unnamed: 0,Address_Postcode,school_count,postcode,train_count,2023,price_per_week,liveability_index
291,3352.0,0.444444,3352,1.0,0.601994,0.154067,0.463068
610,3825.0,0.444444,3825,0.491803,0.858425,0.019737,0.402194
289,3350.0,1.0,3350,0.360656,0.540677,0.114079,0.382109
508,3691.0,0.259259,3691,0.590164,1.0,0.261404,0.35048
638,3875.0,0.37037,3875,0.557377,0.450241,0.164286,0.28205
413,3551.0,0.37037,3551,0.655738,0.313194,0.176316,0.280539
412,3550.0,0.740741,3550,0.245902,0.444751,0.185938,0.255088
708,3977.0,0.962963,3977,0.180328,0.296214,0.193026,0.248026
307,3377.0,0.259259,3377,0.409836,0.401253,0.112281,0.221369
360,3461.0,0.111111,3461,0.442623,0.540008,0.181579,0.208537


## liveable and affordable index

In [92]:
#clean affordable index data
affordable_index1 = affordable_index[['postcode','affordable index of 2023-24']]
affordable_index1 = affordable_index1.groupby('postcode').agg({'affordable index of 2023-24':'mean'}).reset_index()

#merge affordable index and liveable index data
all_index = pd.merge(affordable_index1,liveable_count,on='postcode',how='outer')

#Min-Max normalization index values
all_index['liveability_index'] = (all_index['liveability_index'] - all_index['liveability_index'].min()) / (all_index['liveability_index'].max() - all_index['liveability_index'].min())
all_index['affordable index of 2023-24'] = (all_index['affordable index of 2023-24'] - all_index['affordable index of 2023-24'].min()) / (all_index['affordable index of 2023-24'].max() - all_index['affordable index of 2023-24'].min())

#calculate the liveability and affordability of suburb
all_index['index'] = all_index['affordable index of 2023-24'] * 0.7 + all_index['liveability_index'] * 0.3

#sort index
all_index = all_index.sort_values(by='index',ascending=False)
all_index = all_index[['postcode','index']]

In [93]:
all_index.head()

Unnamed: 0,postcode,index
712,3981,0.839035
610,3825,0.789927
327,3409,0.736872
321,3396,0.723167
289,3350,0.59585


## create map

In [94]:
# define function to extract location points
def extract_location_points(lat_list, lng_list, additional_data):
    points = []
    for lat, lng, data in zip(lat_list, lng_list, additional_data):
        points.append((lat, lng, data))
    return points

In [95]:
# process school data
school_points = extract_location_points(school_data["Y"], school_data["X"], school_data["School_Type"])

# process train data
train_points = extract_location_points(train_data["latitude"], train_data["longitude"], train_data["Stop_ID"])

#process rent data
rent_points = extract_location_points(rent_data["latitude"], rent_data["longitude"],rent_data["price_per_week"])

# process population data
postcode_mapping = pd.read_csv("../data/raw/external/australian_postcodes.csv")[["SA2_CODE_2021", "lat", "long"]]
postcode_dict = {row['SA2_CODE_2021']: (row['lat'], row['long']) for _, row in postcode_mapping.iterrows()}

population_points = [(postcode_dict[suburb][0], postcode_dict[suburb][1], pop) 
                     for suburb, pop in zip(population_data["SA2 code"], population_data["2023"]) 
                     if suburb in postcode_dict]

#process postcode data
postcode_points = extract_location_points(postcode_data["lat"], postcode_data["long"], postcode_data["postcode"])

#process index data
all_index_most = postcode_data[postcode_data['postcode']==3981]
all_index_points = extract_location_points(all_index_most["lat"], all_index_most["long"], all_index_most["postcode"])

In [96]:
# define function to add marker clusters
def create_marker_cluster(map_object, name, icon, color, data_points):
    feature_group = folium.FeatureGroup(name=name, control=True)
    marker_cluster = MarkerCluster(name=name).add_to(feature_group)
    for lat, lng, info in data_points:
        folium.Marker(
            location=[lat, lng],
            icon=folium.Icon(icon=icon, color=color, icon_color='white', prefix="fa"),
            popup=str(info)
        ).add_to(marker_cluster)
    map_object.add_child(feature_group)

In [97]:
# create map
map_object=folium.Map(location=[-36, 145.3906], tiles='CartoDB positron', zoom_start=7)

In [98]:
#create heat map of population in VIC
HeatMap(population_points).add_to(map_object)

<folium.plugins.heat_map.HeatMap at 0x1b92c8c6690>

In [99]:
#clean affordable index data
affordable_index.fillna(0, inplace=True) 

In [100]:
#merge shapefil data and affordable index data and clean
shapefile_data['SA2_CODE21'] = shapefile_data['SA2_CODE21'].astype(str)
affordable_index['SA2'] = affordable_index['SA2'].astype(str)

shapefile_data['SA2']=shapefile_data['SA2_CODE21']
rent_sa2 = gpd.GeoDataFrame(pd.merge(shapefile_data, affordable_index, on='SA2', how='inner'))

In [102]:
# add markers on map
create_marker_cluster(map_object, 'School_Layer', 'university', 'black', school_points)
create_marker_cluster(map_object, 'Train_Layer', 'train', 'green', train_points)
create_marker_cluster(map_object, 'Postcode_Layer', 'map-marker', 'purple', postcode_points)
create_marker_cluster(map_object, 'Rent_Layer', 'map-marker','gray', rent_points)
create_marker_cluster(map_object, 'most_liveable_and_affordable_layer','map_marker', 'green', all_index_points)

In [103]:
#create affordable index gradient map
folium.Choropleth(
    geo_data=rent_sa2, 
    name='choropleth',
    data=affordable_index, 
    columns=['SA2', 'affordable index of 2023-24'],  
    key_on='feature.properties.SA2',  
    fill_color='YlOrRd',  
    fill_opacity=0.7,
    line_opacity=0.2,
    nah_fill_color=None,
    legend_name='Affordable Index'
).add_to(map_object)

<folium.features.Choropleth at 0x1b9808fae10>

In [104]:
# add layer control
folium.LayerControl().add_to(map_object)

<folium.map.LayerControl at 0x1b980ad4a50>

In [105]:
# save map to html file
map_object.save('../plots/livable.html')