<h1>Liveability</h1>

In [None]:
# Importing necessary libraries
import folium
import geopandas as gpd 
import pandas as pd
import sys

sys.path.insert(0, "../scripts/")
from helper_functions import convert_census_to_postcode

## Read in relevant data

In [None]:
all_property_df = pd.read_csv("../data/curated/categorised_distances.csv")

property_df = all_property_df[["Cost", "Bed", "Bath", "Parking", "Postcode"]]
distances_df = all_property_df[["Bed", "Bath", "Parking", "Postcode", "Railway_Duration", "CBD_Duration", "Post_Office_Duration", "Park_Distance", 
                                "Nearby_Schools"]]

schools_df = pd.read_csv("../data/curated/schools/school_counts.csv")

## Dataset creation

### Calculating the average number of nearby amenities per postcode

In [None]:
# Calculate average duration of distances
avg_cbd_dur = distances_df.groupby(by = "Postcode")["CBD_Duration"].mean()
avg_station_dur = distances_df.groupby(by = "Postcode")["Railway_Duration"].mean()
avg_park_dist = distances_df.groupby(by = "Postcode")["Park_Distance"].mean()
avg_post_dur = distances_df.groupby(by = "Postcode")["Post_Office_Duration"].mean()
avg_num_schools = distances_df.groupby(by = "Postcode")["Nearby_Schools"].mean()

# Creating dataframe of average amenities per postcode
amenities_df = pd.DataFrame()
amenities_df["Postcode"] = property_df["Postcode"].unique()
amenities_df["Average Duration to CBD"] = avg_cbd_dur.to_list()
amenities_df["Average Duration to Station"] = avg_station_dur.to_list()
amenities_df["Average Distance to Park"] = avg_park_dist.to_list()
amenities_df["Average Duration to Post Office"] = avg_post_dur.to_list()
amenities_df["Average # of Schools"] = avg_num_schools.to_list()
amenities_df.head()

### Calculating the average number of facilities

In [None]:
avg_bed = property_df.groupby(by = "Postcode")["Bed"].mean()
avg_bath = property_df.groupby(by = "Postcode")["Bath"].mean()
avg_parking = property_df.groupby(by = "Postcode")["Parking"].mean()

# Creating dataframe of average facilities per postcode
facilities_df = pd.DataFrame()
facilities_df["Postcode"] = property_df["Postcode"].unique()
facilities_df["Average # Beds"] = avg_bed.to_list()
facilities_df["Average # Baths"] = avg_bath.to_list()
facilities_df["Average # Parking"] = avg_parking.to_list()
facilities_df.head()

### Join dataframes

In [None]:
# Joining facilities and amenities dataframes
amenities_df.set_index("Postcode", inplace = True)
facilities_df.set_index("Postcode", inplace = True)
postcode_property_df = amenities_df.join(facilities_df, on = "Postcode")
postcode_property_df.head()

### Obtaining census/population data

In [None]:
# Reading in csv files for obtaining census data
# Read in census dataframes and convert to postcode mapping
census_df = pd.read_csv("../data/curated/census_data.csv")
sa2_postcode_map = pd.read_csv("../data/curated/sa2_postcode_mapping_2021.csv")
census_by_postcode_df = convert_census_to_postcode(census_df, sa2_postcode_map, "mean_no_zero")

In [None]:
# Removing unnecessary columns from census data (only need columns containing data from 2021)
census_by_postcode_df = census_by_postcode_df[["postcode_2021", "tot_population_21", "avg_med_mortg_rep_21", "avg_med_person_inc_21", "avg_med_rent_21", 
                            "avg_med_hh_inc_21", "tot_avg_hh_size_21"]]

# Selecting columns required for assessing liveability
population_df = census_by_postcode_df[["postcode_2021", "tot_avg_hh_size_21"]]

# Renaming postcode column of census dataframe and setting to index
population_df = population_df.rename({"postcode_2021": "Postcode"}, axis = 1)
population_df = population_df.set_index("Postcode")

# Joining census population dataframe and facilities and amenities dataframe
df = population_df.join(postcode_property_df, on = "Postcode")
df.head()

## Calculating liveability metric

In [None]:
# If the number of facilities is 0 (i.e. for number of bedrooms or number of parking spaces) replace 0 value with arbitrary small value epsilon
EPSILON = 10**(-6)
 
df.loc[df["Average # Beds"] == 0, "Average # Beds"] = EPSILON
df.loc[df["Average # Parking"] == 0, "Average # Parking"] = EPSILON

In [None]:
TOTAL_PROPERTIES = len(property_df)
TOTAL_SCHOOLS = len(schools_df)

# Counting the number of properties per postcode
num_properties = property_df.groupby("Postcode").size()

In [None]:
# Calcuting the contribution of each postcode to the total number of rental properties as a proportion
df["Property Proportion"] = num_properties / TOTAL_PROPERTIES
df["Schools Proportion"] = df["Average # of Schools"] / TOTAL_SCHOOLS

# Calculating the average number of facilities per person in a household (for each postcode)
df["Beds per Person"] = df["Average # Beds"] / df["tot_avg_hh_size_21"]
df["Baths per Person"] = df["Average # Baths"] / df["tot_avg_hh_size_21"]
df["Parking per Person"] = df["Average # Parking"] / df["tot_avg_hh_size_21"]

# Calculating non-stadardised liveability metric
df["Liveability"] = ((1 / df["Average Duration to CBD"]) + df["Average Duration to Station"] + df["Average Distance to Park"] + 
                    df["Average Duration to Post Office"] + (df["Property Proportion"] * 100) + df["Beds per Person"] + df["Baths per Person"] + 
                    df["Parking per Person"] + df["Schools Proportion"])

# Standardised liveability metric
min_liveability = df.sort_values(by = "Liveability").head(1)["Liveability"].tolist()[0]
max_liveability = df.sort_values(by = "Liveability", ascending = False).head(1)["Liveability"].tolist()[0]

# Taking 1 - standardised liveability (to make sure metric ranks 0 as least liveable & 1 as most liveable)
df["Standardised Liveability"] = (df["Liveability"] - min_liveability) / (max_liveability - min_liveability)
df = df.reset_index()

liveability_df = df[["Postcode", "Standardised Liveability"]]
liveability_df.to_csv("../data/curated/liveability.csv", index=False)

# Top 10 affordable postcodes
most_liveable_df = liveability_df.sort_values(by="Standardised Liveability", ascending = False).head(10)
most_liveable_df

## Graphing standardised affordability

In [None]:
# Creating geoJSON file of postcode and geometry coordinates 
sf = gpd.read_file("../data/raw/external/POA_2021_AUST_GDA2020/POA_2021_AUST_GDA2020.shp")
postcodes = pd.read_csv("../data/raw/external/postcode.csv", names = ["POA_CODE21", "Name", "Area"])
postcodes["POA_CODE21"] = postcodes["POA_CODE21"].astype(int)

# Converting the geometry shaape to to latitude and longitude
# TAKEN FROM TUTE 2 NOTEBOOK
sf["geometry"] = sf["geometry"].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
sf = sf[sf["POA_CODE21"].between("3000", "4000")]
sf["POA_CODE21"] = sf["POA_CODE21"].astype(int)



gdf = gpd.GeoDataFrame(
    pd.merge(postcodes, sf, on = "POA_CODE21", how = "inner")
)

geoJSON = gdf[["POA_CODE21", "geometry"]].drop_duplicates("POA_CODE21").to_json()

**Liveability heatmap**

In [None]:
# (y, x) since we want (lat, long)
gdf["centroid"] = gdf["geometry"].apply(lambda x: (x.centroid.y, x.centroid.x))
gdf[["Name", "POA_CODE21", "centroid"]].head()

liveability_df.rename({"Postcode": "POA_CODE21"})

m = folium.Map(location = [-37.8136, 144.9631], tiles = "Stamen Terrain", zoom_start = 10)

c = folium.Choropleth(
            geo_data = geoJSON, # geoJSON 
            name = "choropleth", # name of plot
            data = liveability_df, # data source
            columns = ["Postcode", "Standardised Liveability"], # the columns required
            key_on = "properties.POA_CODE21", # this is from the geoJSON's properties
            fill_color = "YlOrRd", # color scheme
            nan_fill_color = "grey",
            legend_name = "Liveability"
        )

c.add_to(m)
m.save(f"../plots/liveability_heatmap.html")
m

**Most liveable heatmap**

In [None]:
# Export most liveable heatmap
postcodes["POA_CODE21"] = postcodes["POA_CODE21"].astype(int)
postcodes = postcodes.loc[postcodes["POA_CODE21"].isin(most_liveable_df["Postcode"].to_list())]

gdf = gpd.GeoDataFrame(
    pd.merge(postcodes, sf, on = "POA_CODE21", how = "inner")
)

geoJSON = gdf[["POA_CODE21", "geometry"]].drop_duplicates("POA_CODE21").to_json()

m = folium.Map(location = [-37.8136, 144.9631], tiles = "Stamen Terrain", zoom_start = 10)

m.add_child(folium.Choropleth(geo_data = geoJSON, name = "choropleth",))
m.save(f"../plots/most_liveable_heatmap.html")
m