In [1]:
import pandas as pd
import numpy as np

In [2]:
past_data = pd.read_csv("../data/scoring/past_data.csv")
future_data = pd.read_csv("../data/scoring/future_data.csv")
df = pd.concat([past_data, future_data]) 


import pandas as pd

# Engineer crime rate
df['crime_rate'] = df['Offence Count'] / df['population']

# Normalize metrics
columns_to_normalize = ['Nearest_park', 'Nearest_school', 'Nearest_hospital', 'Nearest_supermarket', 'crime_rate', 'cost', 'population', 'weekly_income', 'distance_to_cbd']
for column in columns_to_normalize:
    df[column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())

# Define weights for liveability metrics
# Note: We're using negative weights for metrics where lower values are better.
weights = {
    'Nearest_park': -0.15,
    'Nearest_school': -0.15,
    'Nearest_hospital': -0.15,
    'Nearest_supermarket': -0.15,
    'distance_to_cbd': -0.15, 
    'crime_rate': -0.1,     # Negative because lower crime rate is better
    'weekly_income': 0.1    # Assuming higher weekly income is better
}

# Calculate liveability score
df['liveability_score'] = sum(df[col] * weight for col, weight in weights.items())

# Adjust the liveability score to be in the range [0, 1]
df['liveability_score'] = (df['liveability_score'] - df['liveability_score'].min()) / (df['liveability_score'].max() - df['liveability_score'].min())

# Calculate affordability score (inverse of cost)
df['affordability_score'] = 1 - df['cost']

# Combine liveability and affordability (assuming equal importance)
df['final_score'] = (df['liveability_score'] + df['affordability_score']) / 2

# Aggregate scores by LGA district
lga_scores = df.groupby('LGA_NAME23')['final_score'].mean()

# Rank LGA districts
lga_ranking = lga_scores.sort_values(ascending=False)

print(lga_ranking)





LGA_NAME23
Merri-bek        0.785862
Moonee Valley    0.773628
Darebin          0.769731
Wyndham          0.766443
Buloke           0.761299
                   ...   
Ararat           0.533940
Swan Hill        0.523592
Wodonga          0.520940
Loddon           0.510362
Mildura          0.389034
Name: final_score, Length: 76, dtype: float64


In [7]:
df.loc[df['LGA_NAME23'] == 'Merri-bek']

Unnamed: 0.1,Unnamed: 0,LGA_NAME23,cost,beds,baths,parkings,Nearest_station,Nearest_park,Nearest_shop,Nearest_hospital,...,Nearest_supermarket,distance_to_cbd,year,Offence Count,population,weekly_income,crime_rate,liveability_score,affordability_score,final_score
44,45,Merri-bek,0.310345,2.0,1.0,1.0,2.953333,0.001481,4.343333,0.001857,...,0.023923,0.011291,2017,14384.0,0.357896,0.427792,0.261026,0.91427,0.689655,0.801963
120,124,Merri-bek,0.324138,2.0,1.0,1.0,2.953333,0.001481,4.343333,0.001857,...,0.023923,0.011291,2018,13351.0,0.366802,0.474413,0.222151,0.928484,0.675862,0.802173
196,203,Merri-bek,0.337931,2.0,1.0,1.0,2.953333,0.001481,4.343333,0.001857,...,0.023923,0.011291,2019,14597.0,0.376269,0.524086,0.247043,0.932603,0.662069,0.797336
272,282,Merri-bek,0.310345,2.0,1.0,1.0,2.953333,0.001481,4.343333,0.001857,...,0.023923,0.011291,2020,14439.0,0.385853,0.577011,0.233072,0.943725,0.689655,0.81669
348,361,Merri-bek,0.296552,2.0,1.0,1.0,2.953333,0.001481,4.343333,0.001857,...,0.023923,0.011291,2021,12934.0,0.39542,0.633401,0.184622,0.961154,0.703448,0.832301
424,440,Merri-bek,0.337931,2.0,1.0,1.0,2.953333,0.001481,4.343333,0.001857,...,0.023923,0.011291,2022,11518.0,0.404993,0.693482,0.140684,0.978446,0.662069,0.820258
500,519,Merri-bek,0.482759,2.0,1.0,1.0,2.953333,0.001481,4.343333,0.001857,...,0.023923,0.011291,2023,12404.0,0.414419,0.757496,0.156209,0.986507,0.517241,0.751874
44,45,Merri-bek,0.464736,2.0,1.0,1.0,2.953333,0.001481,4.343333,0.001857,...,0.023923,0.011291,2024,19055.91,0.423701,0.825701,0.311796,0.97198,0.535264,0.753622
120,124,Merri-bek,0.495187,2.0,1.0,1.0,2.953333,0.001481,4.343333,0.001857,...,0.023923,0.011291,2025,19065.8,0.432778,0.89837,0.302451,0.985615,0.504813,0.745214
196,203,Merri-bek,0.525631,2.0,1.0,1.0,2.953333,0.001481,4.343333,0.001857,...,0.023923,0.011291,2026,19062.58,0.441682,0.975797,0.293347,1.0,0.474369,0.737185
