### This file is dedicated to calculated a affordable score for each suburb

Created by Ran Zhang 01-10-2024

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load the CSV file
file_path = '../../data/curated/suburb_yearly_aggregates.csv'
df_affordable = pd.read_csv(file_path)

In [3]:
# Filter the data for the years 2024-2027
df_affordable_filtered = df_affordable[df_affordable['Year'].between(2024, 2027)]

In [4]:
# Define a function to calculate the affordable score per year with a rent-to-income floor
def calculate_affordable_score(row):
    if pd.isna(row['Median_Income']) or row['Median_Income'] == 0:
        return None  # Avoid division by zero or missing income
    
    # Convert annual income to weekly income
    rent_to_income_ratio = row['Median_Cost'] / (row['Median_Income'] / 52)
    
    # Apply logarithmic transformation to compress the ratio's range
    rent_to_income_ratio = np.log1p(rent_to_income_ratio)  # Logarithmic transformation to smooth extreme values
    
    # Set a floor for the rent-to-income ratio to prevent overly favoring low-rent areas
    rent_to_income_ratio = max(0.4, min(rent_to_income_ratio, 1))  # Set a floor of 0.4
    
    # Calculate the base affordable score (higher score means more affordable)
    affordable_score = max((1 - rent_to_income_ratio) * 100, 0)
    
    # Apply growth rate adjustment if available
    if pd.notna(row['Median_Growth_Rate']):
        affordable_score *= (1 - min(row['Median_Growth_Rate'], 1))
    
    # Ensure the score is between 0 and 100 (initial raw score before normalization)
    return max(min(affordable_score, 100), 0)

# Apply the function to each row to calculate the affordable score per year
df_affordable_filtered['Affordable Score'] = df_affordable_filtered.apply(calculate_affordable_score, axis=1)

# Group by suburb and calculate the average affordable score across 2024-2027
df_affordable_avg = df_affordable_filtered.groupby('Suburb')['Affordable Score'].mean().reset_index()

# Normalization step to ensure scores are between 0 and 100, and median is around 50
min_score = df_affordable_avg['Affordable Score'].min()
max_score = df_affordable_avg['Affordable Score'].max()

df_affordable_avg['Scaled Affordable Score'] = df_affordable_avg['Affordable Score'].apply(
    lambda x: 100 * (x - min_score) / (max_score - min_score) if max_score != min_score else 50
)

# Shift scores so that the median is around 50
median_score = df_affordable_avg['Scaled Affordable Score'].median()
df_affordable_avg['Final Affordable Score'] = df_affordable_avg['Scaled Affordable Score'].apply(
    lambda x: x - (median_score - 50)
)

# Ensure final scores are still between 0 and 100
df_affordable_avg['Final Affordable Score'] = df_affordable_avg['Final Affordable Score'].clip(lower=0, upper=100)

# Sort suburbs by the final affordable score
df_affordable_avg_sorted = df_affordable_avg.sort_values(by='Final Affordable Score', ascending=False)

# Save the final affordable score to a CSV file 
df_affordable_avg_sorted[['Suburb', 'Final Affordable Score']].to_csv('../../data/curated/affordable_score_by_suburb.csv', index=False)

# Display the top 10 most affordable suburbs
print(df_affordable_avg_sorted[['Suburb', 'Final Affordable Score']].head(10))


                    Suburb  Final Affordable Score
46                  Echuca              100.000000
14                Brighton               93.260442
64       Hampton-Beaumaris               81.453345
8                  Benalla               75.910494
101         Port Melbourne               72.854075
20       Burwood-Ashburton               70.304547
84   Mount Clear-Buninyong               69.412607
15           Brighton East               67.360892
134    Wendouree-Alfredton               65.282508
79                 Mildura               64.255931


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_affordable_filtered['Affordable Score'] = df_affordable_filtered.apply(calculate_affordable_score, axis=1)


In [5]:
df_affordable_avg_sorted.describe()

Unnamed: 0,Affordable Score,Scaled Affordable Score,Final Affordable Score
count,143.0,143.0,143.0
mean,0.318361,5.200917,53.101044
std,0.65321,10.671203,8.108001
min,0.0,0.0,48.237452
25%,0.000371,0.006065,48.243517
50%,0.10789,1.762548,50.0
75%,0.363498,5.93831,54.175762
max,6.12124,100.0,100.0
