# Ranking the liveability of surrounding postcodes

In [1]:
# Importing necessary libraries
import json
import pandas as pd
import warnings

from scipy.spatial import distance

warnings.simplefilter(action="ignore", category=FutureWarning)

In [2]:
# Read in relevant data
postcodes = pd.read_csv("../data/curated/unique_postcodes.csv", header=None, names = ["postcode"])
latlongs = pd.read_csv("../data/raw/external/australian_postcodes/australian_postcodes.csv")

growth_rate_df = pd.read_csv("../data/curated/growth_rate.csv")
affordability_df = pd.read_csv("../data/curated/affordability.csv")
livability_df = pd.read_csv("../data/curated/liveability.csv")

### Featurise postcodes

In [3]:
# Merging postcode data
latlongs = latlongs.loc[latlongs["state"]  == "VIC"]

merged = postcodes.merge(latlongs, on = "postcode", how = "left")
postcode_latlong = merged[["postcode", "lat", "long"]]

postcode_latlong = postcode_latlong.drop_duplicates(subset = ["postcode"])

In [4]:
# Calculate distance to each postcode   
postcode_dict = {}
for i in range(0, len(postcode_latlong)):
    inner_dict = {}
    current_postcode = postcode_latlong.iloc[i]
    curr_coords = [current_postcode["lat"], current_postcode["long"]]

    for j in range(0, len(postcode_latlong)):
        if j == i:
            continue
        else:
            # Calculate euclidean distance between current postcode and all others
            next_postcode = postcode_latlong.iloc[j]
            next_coords = [next_postcode["lat"], next_postcode["long"]]

            inner_dict[int(next_postcode["postcode"])] = distance.euclidean(curr_coords, next_coords)

    df = pd.DataFrame(list(inner_dict.items()), columns = ["postcode", "distance"])
    df = df.sort_values("distance")
    ten_closest = df.head(10)

    postcode_dict[int(current_postcode["postcode"])] = dict(zip(ten_closest["postcode"], ten_closest["distance"]))

In [5]:
# Save postcode distances
with open("../data/curated/postcode_dists.json", "w") as outfile: 
    json.dump(postcode_dict, outfile)

### Combine desireability measures

In [6]:
# View growth rates
growth_rate_df.columns = ["Postcode", "Growth Rate"]
growth_rate_df

Unnamed: 0,Postcode,Growth Rate
0,3000,-0.091536
1,3002,-0.143222
2,3003,-0.265318
3,3004,-0.025944
4,3006,0.050168
...,...,...
221,3975,0.078726
222,3976,0.104683
223,3977,0.208066
224,3978,0.145794


In [7]:
# View liveability scores
livability_df

Unnamed: 0.1,Unnamed: 0,Postcode,Standardised Liveability
0,0,3000,0.132226
1,1,3002,0.167572
2,2,3003,0.155316
3,3,3004,0.225321
4,4,3006,0.292377
...,...,...,...
689,689,3990,
690,690,3991,
691,691,3992,
692,692,3995,0.447956


In [8]:
# View affordability scores
affordability_df

Unnamed: 0.1,Unnamed: 0,Postcode,Standardised Affordability
0,0,3000,0.757960
1,1,3002,0.815067
2,2,3003,0.594731
3,3,3004,0.806434
4,4,3006,0.936327
...,...,...,...
404,404,3981,0.698136
405,405,3984,0.645537
406,406,3987,0.624466
407,407,3995,0.543672


In [9]:
# Merge all scores with postcodes
postcodes.columns = ["Postcode"]
merged = postcodes.merge(growth_rate_df, on = "Postcode", how = "outer").merge(affordability_df, on="Postcode", how = "outer").merge(livability_df, on = "Postcode", how = "outer")
final_df = merged.drop(["Unnamed: 0_x", "Unnamed: 0_y"], axis = 1)

In [10]:
# Save merged dataset
final_df.to_csv("../data/curated/growthrate_afford_live.csv")