# Top 10 Most Liveable Suburbs

In [None]:
import pandas as pd
import numpy as np

In [23]:
property_df = pd.read_csv("../data/curated/full_data.csv")

In [82]:
# select mean values for each suburb
suburb_data = property_df.groupby('SA2_NAME21',  as_index=False).agg(med_personal_inc_weekly=('Median_tot_prsnl_inc_weekly', 'mean'),
                                                   med_rent_weekly=('Median_rent_weekly', 'mean'),
                                                   med_fam_inc_weekly=('Median_tot_fam_inc_weekly', 'mean'),
                                                   offence_count = ('Offence Count', 'mean'))

In [69]:
suburb_data.head(2)

Unnamed: 0,SA2_NAME21,med_personal_inc_weekly,med_rent_weekly,med_fam_inc_weekly,offence_count
0,Airport West,883.0,401.0,2292.0,519.0
1,Alphington - Fairfield,1098.0,381.0,3138.0,435.0


In [85]:
# select mode value for walking distance to each train station of each suburb
mode_duration_mins = property_df.groupby('SA2_NAME21',  as_index=False)['duration_mins'].agg(pd.Series.mode)

In [73]:
mode_duration_mins.head(3)

Unnamed: 0,SA2_NAME21,duration_mins
0,Airport West,>50.0
1,Alphington - Fairfield,20.0
2,Altona,10.0


In [125]:
# merge two dataframes together
full_suburb_data = suburb_data.merge(mode_duration_mins, how='inner', on='SA2_NAME21')

#### Finding "liveable" suburbs

In [126]:
# suburbs that are at most a 20 min walk from the train station
full_suburb_data_20mins = full_suburb_data[full_suburb_data.duration_mins.isin(['10.0', '20.0'])].reset_index()
full_suburb_data_20mins = full_suburb_data_20mins.drop(['index'], axis=1)

In [127]:
full_suburb_data_20mins.head(2)

Unnamed: 0,SA2_NAME21,med_personal_inc_weekly,med_rent_weekly,med_fam_inc_weekly,offence_count,duration_mins
0,Alphington - Fairfield,1098.0,381.0,3138.0,435.0,20.0
1,Altona,945.0,390.0,2475.0,403.0,10.0


In [142]:
# 25th percentile for offence_count
p_25 = np.percentile(full_suburb_data_20mins['offence_count'], 25)

In [151]:
suburb_lesscrime = full_suburb_data_20mins.loc[full_suburb_data_20mins['offence_count'] <= p_25]
suburb_lesscrime.shape

(31, 6)

In [160]:
# mean weekly family income
inc_mean = suburb_lesscrime['med_fam_inc_weekly'].mean()

In [161]:
rich_suburb = suburb_lesscrime.loc[suburb_lesscrime['med_fam_inc_weekly'] >= inc_mean]
rich_suburb.shape

(15, 6)

In [166]:
# sort on weekly rent and median personal weekly income
final_df = suburb_lesscrime.sort_values(by=['med_rent_weekly', 'med_personal_inc_weekly'], ascending=[True, False])

In [167]:
final_df.head(10)

Unnamed: 0,SA2_NAME21,med_personal_inc_weekly,med_rent_weekly,med_fam_inc_weekly,offence_count,duration_mins
16,Campbellfield - Coolaroo,437.0,330.0,1245.0,424.0,20.0
113,Upwey - Tecoma,902.0,375.0,2477.0,125.0,20.0
57,Laverton,875.0,380.0,2241.0,241.0,10.0
99,South Morang - South,781.0,380.0,2147.0,416.0,20.0
38,Fawkner,594.0,380.0,1691.0,307.0,20.0
0,Alphington - Fairfield,1098.0,381.0,3138.0,435.0,20.0
24,Clayton South,685.0,381.0,1743.0,507.0,20.0
36,Essendon (West) - Aberfeldie,1019.0,382.0,2926.0,497.0,20.0
118,West Melbourne - Residential,1047.0,390.0,2397.0,428.0,10.0
1,Altona,945.0,390.0,2475.0,403.0,10.0
