In [None]:
import pandas as pd
import requests

In [None]:
url = 'https://raw.githubusercontent.com/matthewproctor/australianpostcodes/master/australian_postcodes.csv'

response = requests.get(url)

if response.status_code == 200:
    with open('../data/raw//externel/australian_postcodes.csv', 'wb') as file:
        file.write(response.content)
    print("files saved as 'australian_postcodes.csv'")
else:
    print(f"download failed: {response.status_code}")

In [None]:
### this part is used to re calculate the population by postcode

data = pd.read_csv('../data/raw//externel/australian_postcodes.csv')
### selected the VIC postcodes
data = data[data['state'] == 'VIC']
### get the useful columns
selected_columns = ['postcode', 'locality', 'long', 'lat', 'SA2_NAME_2021']
data = data[selected_columns]
### drop empty values and save
data = data.dropna()
data.to_csv('../data/curated/external/vic_postcodes.csv')

### read sa2 file
pop = pd.read_csv('../data/curated/external/SA2/sa2final.csv')

In [None]:
### combine postcode and sa2 
merged_data = pd.merge(data, pop, left_on='SA2_NAME_2021', right_on='SA2 name',how='right')
### deal with some missing values
merged_data.loc[merged_data['SA2 name'] == 'Alfredton', 'postcode'] = 3350
merged_data.loc[merged_data['SA2 name'] == 'Canadian - Mount Clear', 'postcode'] = 3350
merged_data.loc[merged_data['SA2 name'] == 'Ballarat', 'postcode'] = 3350
merged_data.loc[merged_data['SA2 name'] == 'Smythes Creek', 'postcode'] = 3351
merged_data.loc[merged_data['SA2 name'] == 'Ballarat North - Invermay', 'postcode'] = 3352
merged_data.loc[merged_data['SA2 name'] == 'Ballarat East - Warrenheip', 'postcode'] = 3352
merged_data.loc[merged_data['SA2 name'] == 'Bacchus Marsh Surrounds', 'postcode'] = 3340
merged_data.loc[merged_data['SA2 name'] == 'Creswick - Clunes', 'postcode'] = 3363
merged_data.loc[merged_data['SA2 name'] == 'Maryborough', 'postcode'] = 3465
merged_data.loc[merged_data['SA2 name'] == 'Bendigo', 'postcode'] = 3350
merged_data.loc[merged_data['SA2 name'] == 'California Gully - Eaglehawk', 'postcode'] = 3350
merged_data.loc[merged_data['SA2 name'] == 'East Bendigo - Kennington', 'postcode'] = 3350
merged_data.loc[merged_data['SA2 name'] == 'Flora Hill - Spring Gully', 'postcode'] = 3350
merged_data = merged_data.drop(['Unnamed: 0'],axis=1)

In [None]:
### get the postcode population and save data
summed_data = merged_data.groupby('postcode').agg({
    '2021 popluation': 'sum',
    '2022 popluation': 'sum',
    '2023 popluation': 'sum'
}).reset_index()
summed_data.to_csv('../data/curated/SA2/postcode_pop.csv')
summed_data

### start liveavility and affordablity calculation

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np


file_path = '../data/curated/external/cleaned_final_data.csv'  
data_ = pd.read_csv(file_path)
cols_drop = ['beds','baths','parking','SA2_CODE_2021','SA2_NAME_2021','2021 popluation','2022 popluation','2023 popluation']
data_ = data_.drop(columns=cols_drop)
pp = pd.read_csv('../data/curated/SA2/postcode_pop.csv')

df_1 = pd.merge(data_, pp, on='postcode',how='inner')

In [None]:
### affordability 
df_1['affordability_ratio'] = (df_1['price']  / df_1['Median_tot_prsnl_inc_weekly']*0.3)

### number of crimes per 100,000 people
df_1['crime_rate_21'] = (df_1['2021crime'] / df_1['2021 popluation']) * 100000
df_1['crime_rate_22'] = (df_1['2022crime'] / df_1['2022 popluation']) * 100000
df_1['crime_rate_23'] = (df_1['2023crime'] / df_1['2023 popluation']) * 100000
df_1['total_crime_rate'] = df_1['crime_rate_21'] + df_1['crime_rate_22'] + df_1['crime_rate_23']
df_1 = df_1.dropna()
df_1 = df_1[~df_1.isin([np.inf, -np.inf]).any(axis=1)]

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

### standardies distance 
distance_columns = ['minimum_distance_school', 'minimum_distance_police', 
                    'minimum_distance_station', 'minimum_distance_supermarket', 
                    'minimum_distance_library']
df_1[distance_columns] = scaler.fit_transform(df[distance_columns])

### get the weight 
distance_weights = {
    'minimum_distance_school': 0.2,
    'minimum_distance_police': 0.25,     
    'minimum_distance_station': 0.2,    
    'minimum_distance_supermarket': 0.15,
    'minimum_distance_library': 0.2    
}

### standardies crime counts
df_1['total_crime_rate'] = df_1['total_crime_rate'].astype(np.float64)
df_1['total_crime_rate'] = scaler.fit_transform(df_1[['total_crime_rate']])
df_1['crime_score'] = 1 - df_1['total_crime_rate']  

### weight the crime rate
livability_score = sum(df_1[col] * weight for col, weight in distance_weights.items()) + df_1['crime_score'] * 0.25
livability_score = livability_score / (sum(distance_weights.values()) + 1)  
df_1['livability_score'] = livability_score


df_1['affordability_score'] = scaler.fit_transform(df_1[['affordability_ratio']])
df_1['affordability_score'] = 1 - df_1['affordability_score']  

df_1['combined_score'] = (df_1['livability_score']* 0.5 + df_1['affordability_score'] * 0.5)

df_1.to_csv('../data/curated/combine_score.csv')

### get top 10
top_10 = df_1.sort_values(by='combined_score', ascending=False)
top_10 = top_10.drop_duplicates('Suburb')
top_10[['Suburb', 'livability_score','affordability_score', 'combined_score']].iloc[0:10]