In [29]:
import pandas as pd
import numpy as np

In [30]:
past_data = pd.read_csv("../data/scoring/past_data.csv")
future_data = pd.read_csv("../data/scoring/future_data.csv")
df = pd.concat([past_data, future_data]) 


import pandas as pd

# Engineer crime rate
df['crime_rate'] = df['Offence Count'] / df['population']

# Normalize metrics
columns_to_normalize = ['Nearest_park', 'Nearest_school', 'Nearest_hospital', 'Nearest_supermarket', 'crime_rate', 'cost', 'population', 'weekly_income', 'distance_to_cbd']
for column in columns_to_normalize:
    df[column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())

# Define weights for liveability metrics
# Note: We're using negative weights for metrics where lower values are better.
weights = {
    'Nearest_park': -0.15,
    'Nearest_school': -0.15,
    'Nearest_hospital': -0.15,
    'Nearest_supermarket': -0.15,
    'distance_to_cbd': -0.15, 
    'crime_rate': -0.1,     # Negative because lower crime rate is better
    'weekly_income': 0.1    # Assuming higher weekly income is better
}

# Calculate liveability score
df['liveability_score'] = sum(df[col] * weight for col, weight in weights.items())

# Adjust the liveability score to be in the range [0, 1]
df['liveability_score'] = (df['liveability_score'] - df['liveability_score'].min()) / (df['liveability_score'].max() - df['liveability_score'].min())

# Calculate affordability score (inverse of cost)
df['affordability_score'] = 1 - df['cost']

# Combine liveability and affordability (assuming equal importance)
df['final_score'] = (df['liveability_score'] + df['affordability_score']) / 2

# Aggregate scores by LGA district
lga_scores = df.groupby('LGA_NAME23')['final_score'].mean()

# Rank LGA districts
lga_ranking = lga_scores.sort_values(ascending=False)

print(lga_ranking)





LGA_NAME23
Wyndham      0.799622
Merri-bek    0.783770
Casey        0.773934
Melton       0.766841
Darebin      0.763512
               ...   
Ararat       0.520954
Swan Hill    0.512774
Wodonga      0.512749
Loddon       0.500329
Mildura      0.388457
Name: final_score, Length: 76, dtype: float64
