In [28]:
# import library
import pandas as pd
import folium
import geopandas as gpd
from folium.plugins import MarkerCluster

In [29]:
# read data
school_data = pd.read_csv("../data/curated/school_location_cleaned_2023.csv")
train_data = pd.read_csv("../data/curated/train_station_cleaned.csv")
population_data = pd.read_csv("../data/curated/population_cleaned.csv")
rent_data = pd.read_csv("../data/curated/rent_cleaned.csv")
postcode_data = pd.read_csv("../data/raw/external/australian_postcodes.csv")

In [30]:
# Load the map file for post codes
shapefile_data = gpd.read_file("../data/raw/external/SA2_2021_ShapeFile/SA2_2021_AUST_GDA2020.shp")
shapefile_data = shapefile_data.rename(columns={'SA2_2021': 'Postcode'})

# liveable index

### school count

In [34]:
#calculate and sort by school count nearby for each postcode
school_count = school_data.groupby('Address_Postcode').size().reset_index(name='school_count')
school_count = school_count.rename(columns={'Address_Postcode': 'postcode'})
school_count = school_count.groupby('postcode').agg({'school_count': 'sum'}).reset_index()
school_count = school_count.sort_values(by='school_count',ascending=False)

In [35]:
school_count.head()

Unnamed: 0,postcode,school_count
20,3029,28
252,3350,28
564,3977,27
21,3030,26
50,3064,25


### train station count

In [6]:
#rename column lat and long in postcode data
postcode_data['latitude'] = postcode_data['lat']
postcode_data['longitude'] = postcode_data['long']

#merge postcode data and train data
train_count = pd.merge(postcode_data, train_data, on=['latitude', 'longitude'], how='left')

#calculate and sort nearby train station count for each postcode
train_count = train_count.groupby('postcode').size().reset_index(name='train_count')
school_count = train_count.groupby('postcode').agg({'train_count': 'sum'}).reset_index()
train_count = train_count.sort_values(by='train_count',ascending=False)

In [7]:
train_count.head()

Unnamed: 0,postcode,train_count
13,822,111
40,872,110
2049,4702,106
915,2795,102
681,2460,97


### population count

In [48]:
#sort population data by 2023 population
population_sort=population_data.sort_values(by='2023',ascending=False)
population_sort=population_sort[['SA2 name','SA2 code','2023']]

#calculate and sort each postcode's population in 2023
population_count=pd.merge(population_sort, postcode_data, left_on='SA2 name', right_on='SA2_NAME_2021', how='inner')
population_count=population_count.groupby('postcode').agg({'2023':'sum'}).reset_index()
population_count=population_count[['postcode','2023']]
population_count=population_count.sort_values(by='2023',ascending=False)


In [49]:
population_count.head()

Unnamed: 0,postcode,2023
487,3691,562932
192,3221,527982
581,3821,488340
585,3825,470518
692,3995,395556


### rent count

In [38]:
#sort each postcode's mean rent
rent_count=rent_data.groupby('postcode').agg({'price_per_week':'mean'}).reset_index()
rent_count=rent_count.sort_values(by='price_per_week')

In [39]:
rent_count.head()

Unnamed: 0,postcode,price_per_week
329,3981,200.0
214,3409,200.0
212,3396,200.0
193,3318,230.0
285,3825,237.5


### calculate liveablity

In [50]:
#merge all data of count
liveable_count = school_count.merge(train_count,on='postcode',how='outer')\
    .merge(population_count, on='postcode', how='outer')\
        .merge(rent_count, on='postcode', how='outer')

#define weight of each element
school_weight = 0.3
train_weight = 0.3
population_weight = 0.2
rent_weight = 0.2

#use Z-score to nomalization
liveable_count['school_count'] = (liveable_count['school_count'] - liveable_count['school_count'].mean()) / liveable_count['school_count'].std()
liveable_count['train_count'] = (liveable_count['train_count'] - liveable_count['train_count'].mean()) / liveable_count['train_count'].std()
liveable_count['2023'] = (liveable_count['2023'] - liveable_count['2023'].mean()) / liveable_count['2023'].std()
liveable_count['price_per_week'] = (liveable_count['price_per_week'] - liveable_count['price_per_week'].mean()) / liveable_count['price_per_week'].std()

#calculate liveability for each postcode
liveable_count['liveability_index'] = (liveable_count['school_count'] * school_weight + liveable_count['train_count'] * train_weight + liveable_count['2023'] * population_weight - liveable_count['price_per_week'] * rent_weight)

#sort by liveability
liveable_count = liveable_count.sort_values(by='liveability_index', ascending=False)


In [51]:
liveable_count.iloc[:10,:]

Unnamed: 0,postcode,school_count,train_count,2023,price_per_week,liveability_index
1282,3352,2.191766,5.998478,4.545999,-0.468051,3.459883
1280,3350,5.848248,1.832798,4.003559,-0.825282,3.270082
1601,3825,2.191766,2.687296,6.814501,-1.668083,3.160236
1499,3691,0.972939,3.434982,8.306037,0.490835,2.885417
1699,3977,5.604483,0.657863,1.840917,-0.120009,2.270889
1403,3550,4.14189,1.085112,3.154949,-0.183337,2.235758
1629,3875,1.704235,3.114546,3.203513,-0.376762,2.161689
1404,3551,1.704235,3.755419,1.99113,-0.269292,2.089981
1185,3216,3.166828,0.337426,3.093473,-0.451673,1.760305
1016,3029,5.848248,-0.303448,0.185853,-0.133638,1.727338


In [47]:
# define function to extract location points
def extract_location_points(lat_list, lng_list, additional_data):
    points = []
    for lat, lng, data in zip(lat_list, lng_list, additional_data):
        points.append((lat, lng, data))
    return points

In [48]:
# process school data
school_points = extract_location_points(school_data["Y"], school_data["X"], school_data["School_Type"])

# process train data
train_points = extract_location_points(train_data["latitude"], train_data["longitude"], train_data["Stop_ID"])

# process rent data
rent_points = extract_location_points(rent_data["latitude"], rent_data["longitude"], rent_data["price_per_week"])

# process population data
postcode_mapping = pd.read_csv("../data/raw/external/australian_postcodes.csv")[["SA2_CODE_2021", "lat", "long"]]
postcode_dict = {row['SA2_CODE_2021']: (row['lat'], row['long']) for _, row in postcode_mapping.iterrows()}

population_points = [(postcode_dict[suburb][0], postcode_dict[suburb][1], pop) 
                     for suburb, pop in zip(population_data["SA2 code"], population_data["2023"]) 
                     if suburb in postcode_dict]

#process postcode data
postcode_points = extract_location_points(postcode_data["lat"], postcode_data["long"], postcode_data["postcode"])

In [49]:
# define function to add marker clusters
def create_marker_cluster(map_object, name, icon, color, data_points):
    feature_group = folium.FeatureGroup(name=name, control=True)
    marker_cluster = MarkerCluster(name=name).add_to(feature_group)
    for lat, lng, info in data_points:
        folium.Marker(
            location=[lat, lng],
            icon=folium.Icon(icon=icon, color=color, icon_color='white', prefix="fa"),
            popup=str(info)
        ).add_to(marker_cluster)
    map_object.add_child(feature_group)

In [50]:
# create map
map_object = folium.Map(location=[-36, 145.3906], tiles="OpenStreetMap", zoom_start=7)

In [51]:
# add markers on map
create_marker_cluster(map_object, 'School_Layer', 'university', 'black', school_points)
create_marker_cluster(map_object, 'Train_Layer', 'train', 'green', train_points)
create_marker_cluster(map_object, 'Rent_Layer', 'home', 'red', rent_points)
create_marker_cluster(map_object, 'Population_Layer', 'child', 'blue', population_points)
create_marker_cluster(map_object, 'Postcode_Layer', 'map-marker', 'purple', postcode_points)

In [52]:
# add layer control
folium.LayerControl().add_to(map_object)

<folium.map.LayerControl at 0x2b7b6287ed0>

In [53]:
# save map to html file
map_object.save('../plots/livable.html')