In [23]:
import pandas as pd
import numpy as np

In [24]:
past_data = pd.read_csv("../data/scoring/past_data.csv")
future_data = pd.read_csv("../data/scoring/future_data.csv")
df = pd.concat([past_data, future_data])  # Assuming you've combined train and test data

# Assuming df is your combined dataset from 2017 to 2026

import pandas as pd

# Sample data loading (replace with your actual data loading)
# df = pd.read_csv('your_data.csv')

# Engineer crime rate
df['crime_rate'] = df['Offence Count'] / df['population']

# Normalize metrics
columns_to_normalize = ['Nearest_park', 'Nearest_school', 'Nearest_hospital', 'Nearest_supermarket', 'crime_rate', 'cost', 'population', 'weekly_income', 'distance_to_cbd']
for column in columns_to_normalize:
    df[column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())

# Define weights for liveability metrics
# Note: We're using negative weights for metrics where lower values are better.
weights = {
    'Nearest_park': -0.15,
    'Nearest_school': -0.15,
    'Nearest_hospital': -0.15,
    'Nearest_supermarket': -0.15,
    'distance_to_cbd': -0.15, 
    'crime_rate': -0.1,     # Negative because lower crime rate is better
    'population': 0.1,      # Assuming higher population is better
    'weekly_income': 0.1    # Assuming higher weekly income is better
}

# Calculate liveability score
df['liveability_score'] = sum(df[col] * weight for col, weight in weights.items())

# Adjust the liveability score to be in the range [0, 1]
df['liveability_score'] = (df['liveability_score'] - df['liveability_score'].min()) / (df['liveability_score'].max() - df['liveability_score'].min())

# Calculate affordability score (inverse of cost)
df['affordability_score'] = 1 - df['cost']

# Combine liveability and affordability (assuming equal importance)
df['final_score'] = (df['liveability_score'] + df['affordability_score']) / 2

# Aggregate scores by LGA district
lga_scores = df.groupby('LGA_NAME23')['final_score'].mean()

# Rank LGA districts
lga_ranking = lga_scores.sort_values(ascending=False)

print(lga_ranking)





LGA_NAME23
Wyndham      0.825125
Merri-bek    0.809743
Casey        0.798597
Melton       0.794067
Darebin      0.789390
               ...   
Ararat       0.543225
Swan Hill    0.533949
Wodonga      0.532624
Loddon       0.523126
Mildura      0.410237
Name: final_score, Length: 76, dtype: float64


In [25]:
df

Unnamed: 0.1,Unnamed: 0,LGA_NAME23,LGA_CODE23,name,geometry,cost,beds,baths,parkings,Nearest_station,...,Nearest_supermarket,distance_to_cbd,year,Offence Count,population,weekly_income,crime_rate,liveability_score,affordability_score,final_score
0,0,Alpine,20110,34 O'Donnell Avenue Myrtleford VIC 3737,POINT (146.7271339 -36.5522656),0.131034,2.0,1.0,2.0,46.428333,...,0.047224,1.000000,2017,396.00,0.009469,0.174471,0.074956,0.579229,0.868966,0.724097
1,1,Ararat,20260,4 Grano Street Ararat VIC 3377,POINT (142.9414137 -37.2818641),0.117241,3.0,1.0,1.0,2.428333,...,0.065490,1.000000,2017,1249.00,0.007526,0.145866,0.715194,0.265202,0.882759,0.573980
2,2,Ballarat,20570,3/41 Longley Street Alfredton VIC 3350,POINT (143.8376317 -37.5653948),0.144828,3.0,2.0,2.0,7.341667,...,0.119772,1.000000,2017,11885.00,0.189996,0.255616,0.469970,0.549568,0.855172,0.702370
3,3,Banyule,20660,3 Clinton Street Heidelberg Heights VIC 3081,POINT (145.0598679 -37.7598674),0.303448,3.0,1.0,1.0,2.876667,...,0.052932,0.018687,2017,9703.00,0.159787,0.139063,0.446638,0.795491,0.696552,0.746022
4,4,Bass Coast,20740,23 Carpathia Street Coronet Bay VIC 3984,POINT (145.4968004 -38.4790177),0.151724,3.0,2.0,2.0,86.750000,...,0.200727,0.112745,2017,2613.00,0.036732,0.121052,0.458013,0.640711,0.848276,0.744494
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,232,Wodonga,27170,20 Klim Street Killara VIC 3691,POINT (146.8857882 -36.1313959),0.257339,3.0,2.0,2.0,10.213333,...,0.092164,1.000000,2026,3705.09,0.093241,0.357629,0.219153,0.332158,0.742661,0.537409
224,233,Wyndham,27260,3 Victor Court Hoppers Crossing VIC 3029,POINT (144.6772403 -37.8273763),0.295431,4.0,2.0,2.0,6.440000,...,0.084639,0.030307,2026,16977.57,1.000000,0.353220,0.005616,0.996126,0.704569,0.850348
225,234,Yarra,27350,310/8 Howard Street Richmond VIC 3121,POINT (144.9938461 -37.82125690000001),0.472238,2.0,1.0,1.0,3.283333,...,0.013752,0.004775,2026,12760.28,0.205944,0.982061,0.465734,0.950742,0.527762,0.739252
226,235,Yarra Ranges,27450,315 MACCLESFIELD ROAD Macclesfield VIC 3782,POINT (145.3260355 -37.75560160000001),0.351503,3.0,2.0,2.0,4.188333,...,0.089258,0.039783,2026,7311.82,0.214466,0.185341,0.178055,0.822264,0.648497,0.735381
