# Score the Liveablity

In [8]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from sklearn import preprocessing
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import pylab

In [9]:
df = pd.read_csv('../data/curated/merged_data.csv')

In [10]:
df = df.groupby(['postcode']).agg({
    'SA2':'first',
    'price':'mean',
    'avg_income': 'mean',
    'avg_population': 'mean',
    'avg_crime': 'mean'
}).reset_index()

In [11]:
df.head()

Unnamed: 0,postcode,SA2,price,avg_income,avg_population,avg_crime
0,3000,206041122,582.80303,47378.47006,62833.31,184.036145
1,3002,206041119,616.0,66541.0,6187.549,15.622642
2,3003,206041127,580.410959,19507.0,1.788139e-07,23.313725
3,3004,206041126,602.571429,47378.47006,31839.9,34.234818
4,3006,206041126,625.492754,47378.47006,31839.9,34.234818


## Merge School Data from SA2 to Postcode

In [12]:
school = pd.read_csv("../data/landing/School Locations 2023.csv")
school = school.groupby('Address_Postcode').size().reset_index(name='school_counts')
school = school.rename(columns={'Address_Postcode': 'postcode'})
merged_df = df.merge(school, on='postcode', how='left')
merged_df['school_counts'] = merged_df['school_counts'].fillna(0).astype(int)
merged_df.head()

Unnamed: 0,postcode,SA2,price,avg_income,avg_population,avg_crime,school_counts
0,3000,206041122,582.80303,47378.47006,62833.31,184.036145,4
1,3002,206041119,616.0,66541.0,6187.549,15.622642,0
2,3003,206041127,580.410959,19507.0,1.788139e-07,23.313725,1
3,3004,206041126,602.571429,47378.47006,31839.9,34.234818,4
4,3006,206041126,625.492754,47378.47006,31839.9,34.234818,2


## Merge Train Station Data from SA2 to Postcode

In [13]:
train = pd.read_csv("../data/landing/datasource-VIC_Govt_PTV-VIC_Govt_DELWP_datavic_PTV_METRO_TRAIN_STATION.csv")
train_gdf = gpd.GeoDataFrame(train, geometry=gpd.points_from_xy(train['LONGITUDE'], train['LATITUDE']))
train_gdf.crs = "EPSG:4326"
sa2_gdf = gpd.read_file('../data/raw/SA2_shapefile/VIC_SA2_2021_AUST_GDA2020.shp')
sa2_gdf.crs = "EPSG:4326" 
merged_gdf = gpd.sjoin(train_gdf, sa2_gdf, how="inner", op="within")
train_counts = merged_gdf.groupby('SA2_CODE21').size().reset_index(name='train_counts')

merged_df = merged_df.merge(train_counts, left_on='SA2', right_on='SA2_CODE21', how='left').drop(columns='SA2_CODE21')
merged_df['train_counts'] = merged_df['train_counts'].fillna(0).astype(int)
merged_df.head()

  if await self.run_code(code, result, async_=asy):


Unnamed: 0,postcode,SA2,price,avg_income,avg_population,avg_crime,school_counts,train_counts
0,3000,206041122,582.80303,47378.47006,62833.31,184.036145,4,0
1,3002,206041119,616.0,66541.0,6187.549,15.622642,0,1
2,3003,206041127,580.410959,19507.0,1.788139e-07,23.313725,1,1
3,3004,206041126,602.571429,47378.47006,31839.9,34.234818,4,0
4,3006,206041126,625.492754,47378.47006,31839.9,34.234818,2,0


## Merge Hospital Data from SA2 to Postcode

In [14]:
hospital = pd.read_csv("../data/landing/all_hospitals_in_victoria.csv")
hospital_gdf = gpd.GeoDataFrame(hospital, geometry=gpd.points_from_xy(hospital['longitude'], hospital['latitude']))
hospital_gdf.crs = "EPSG:4326"
merged_gdf = gpd.sjoin(hospital_gdf, sa2_gdf, how="inner", op="within")
hospital_counts = merged_gdf.groupby('SA2_CODE21').size().reset_index(name='hospital_counts')

merged_df = merged_df.merge(hospital_counts, left_on='SA2', right_on='SA2_CODE21', how='left').drop(columns='SA2_CODE21')
merged_df['hospital_counts'] = merged_df['hospital_counts'].fillna(0).astype(int)
merged_df.head()

  if await self.run_code(code, result, async_=asy):


Unnamed: 0,postcode,SA2,price,avg_income,avg_population,avg_crime,school_counts,train_counts,hospital_counts
0,3000,206041122,582.80303,47378.47006,62833.31,184.036145,4,0,0
1,3002,206041119,616.0,66541.0,6187.549,15.622642,0,1,8
2,3003,206041127,580.410959,19507.0,1.788139e-07,23.313725,1,1,0
3,3004,206041126,602.571429,47378.47006,31839.9,34.234818,4,0,0
4,3006,206041126,625.492754,47378.47006,31839.9,34.234818,2,0,0


## Merge Shopping Mall Sata from SA2 to Postcode

In [15]:
mall = pd.read_csv("../data/landing/mall_coordinates.csv")
mall_gdf = gpd.GeoDataFrame(mall, geometry=gpd.points_from_xy(mall['Longitude'], mall['Latitude']))
mall_gdf.crs = "EPSG:4326"
merged_gdf = gpd.sjoin(mall_gdf, sa2_gdf, how="inner", op="within")
mall_counts = merged_gdf.groupby('SA2_CODE21').size().reset_index(name='mall_counts')

merged_df = merged_df.merge(mall_counts, left_on='SA2', right_on='SA2_CODE21', how='left').drop(columns='SA2_CODE21')
merged_df['mall_counts'] = merged_df['mall_counts'].fillna(0).astype(int)
merged_df.head()

  if await self.run_code(code, result, async_=asy):


Unnamed: 0,postcode,SA2,price,avg_income,avg_population,avg_crime,school_counts,train_counts,hospital_counts,mall_counts
0,3000,206041122,582.80303,47378.47006,62833.31,184.036145,4,0,0,0
1,3002,206041119,616.0,66541.0,6187.549,15.622642,0,1,8,1
2,3003,206041127,580.410959,19507.0,1.788139e-07,23.313725,1,1,0,0
3,3004,206041126,602.571429,47378.47006,31839.9,34.234818,4,0,0,0
4,3006,206041126,625.492754,47378.47006,31839.9,34.234818,2,0,0,0


## Merge Park Data from SA2 to Postcode

In [16]:
park = pd.read_csv("../data/landing/parks_in_victoria.csv")
park_gdf = gpd.GeoDataFrame(park, geometry=gpd.points_from_xy(park['longitude'], park['latitude']))
park_gdf.crs = "EPSG:4326"
merged_gdf = gpd.sjoin(park_gdf, sa2_gdf, how="inner", op="within")
park_counts = merged_gdf.groupby('SA2_CODE21').size().reset_index(name='park_counts')

merged_df = merged_df.merge(park_counts, left_on='SA2', right_on='SA2_CODE21', how='left').drop(columns='SA2_CODE21')
merged_df['park_counts'] = merged_df['park_counts'].fillna(0).astype(int)
merged_df.to_csv("../data/curated/scoring_suburb.csv")
merged_df.head()

  if await self.run_code(code, result, async_=asy):


Unnamed: 0,postcode,SA2,price,avg_income,avg_population,avg_crime,school_counts,train_counts,hospital_counts,mall_counts,park_counts
0,3000,206041122,582.80303,47378.47006,62833.31,184.036145,4,0,0,0,0
1,3002,206041119,616.0,66541.0,6187.549,15.622642,0,1,8,1,29
2,3003,206041127,580.410959,19507.0,1.788139e-07,23.313725,1,1,0,0,2
3,3004,206041126,602.571429,47378.47006,31839.9,34.234818,4,0,0,0,0
4,3006,206041126,625.492754,47378.47006,31839.9,34.234818,2,0,0,0,0


## Define a Function which can Return the Input Value form the Interval from (0,1)

In [17]:
def normalize(column, reverse=False):

    """
    Normalize the values in a column of a DataFrame to the range [0, 1].

    Parameters:
    - column: The pandas Series or DataFrame column to be normalized.
    - reverse: If True, lower values will indicate better conditions (e.g., for crime rates).

    Returns:
    - pd.Series: A new Series with normalized values.
    """

    min_val = column.min()
    max_val = column.max()
    if reverse:
        return 1 - (column - min_val) / (max_val - min_val)
    return (column - min_val) / (max_val - min_val)

# Normalize columns in the merged_df DataFrame
merged_df['normalized_crime_rate'] = normalize(merged_df['avg_crime'], reverse=True)
merged_df['normalized_avg_house_price'] = normalize(merged_df['price'], reverse=True)

# Normalize other columns where higher values indicate better conditions
columns_to_normalize = ['avg_population', 'avg_income', 'school_counts', 'train_counts', 'hospital_counts', 'park_counts', 'mall_counts']
for col in columns_to_normalize:
    merged_df[f'normalized_{col}'] = normalize(merged_df[col])

## Weight Assigned to Each Criteria

In [18]:
weights = {
    'normalized_crime_rate': 0.23,
    'normalized_avg_population': 0.12,
    'normalized_avg_income': 0.19,
    'normalized_avg_house_price': 0.24,
    'normalized_school_counts': 0.03,
    'normalized_train_counts': 0.08,
    'normalized_hospital_counts': 0.03,
    'normalized_park_counts': 0.02,
    'normalized_mall_counts': 0.06
}

merged_df['livability_score'] = sum(merged_df[col] * weight for col, weight in weights.items())

In [22]:
sorted_df = merged_df.sort_values(by='livability_score', ascending=False)
sorted_df = sorted_df[['postcode','livability_score']]
sorted_df.to_csv('../data/curated/liveable_score.csv', index=False)

In [25]:
sorted_df = sorted_df.sort_values(by='livability_score', ascending=False)
top10_livabile = sorted_df[['postcode','livability_score']].head(10)
top10_livabile.to_csv('../data/curated/top10_liveable_score.csv', index=False)