### This file is dedicated to put together the predicted 2025 to 2027 data to calculated a growth rate for each property, than, calculate suburb overall growth rate

This is in order to find most growth rate suburb and most growth rate property(s) \
Created by Yuecheng Wang 30-09-2024

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('../../data/curated/final_with_predictions_15_to_27.csv')
df_filtered = df[['Address', 'Suburb', 'Year', 'Cost']]

In [4]:
# Sort the data by Address, Suburb, and Year
df_filtered = df_filtered.sort_values(by=['Address', 'Suburb', 'Year'])

# Calculate the percentage growth rate as (Cost in current year / Cost in previous year - 1) * 100
df_filtered['Growth Rate'] = df_filtered.groupby(['Address', 'Suburb'])['Cost'].transform(lambda x: (x / x.shift(1)))

# Display the first few rows to check the growth rate calculation
print(df_filtered.head())


                                             Address         Suburb    Year  \
18010  004B/12 Albert Street, Hawthorn East VIC 3123  East Hawthorn  2015.0   
18011  004B/12 Albert Street, Hawthorn East VIC 3123  East Hawthorn  2016.0   
18012  004B/12 Albert Street, Hawthorn East VIC 3123  East Hawthorn  2017.0   
18013  004B/12 Albert Street, Hawthorn East VIC 3123  East Hawthorn  2018.0   
18014  004B/12 Albert Street, Hawthorn East VIC 3123  East Hawthorn  2019.0   

             Cost  Growth Rate  
18010  307.692308          NaN  
18011  315.384615     1.025000  
18012  326.923077     1.036585  
18013  341.346154     1.044118  
18014  344.807692     1.010141  


In [5]:
df_pivot = df_filtered.pivot_table(
    index=['Address', 'Suburb'],  # Keep these columns as the index
    columns='Year',                 # The years become columns
    values='Growth Rate',           # The values are the growth rates
    aggfunc='first'                 # We take the first occurrence (unique for each year)
)

In [6]:
df_pivot.columns = [int(year) for year in df_pivot.columns]


df_pivot['Average Growth Rate'] = df_pivot.mean(axis=1)


df_final = df_pivot.reset_index()

In [7]:
df_final.to_csv('../../data/curated/property_growth_rates.csv', index=False)

print(df_final.head())

                                         Address  \
0  004B/12 Albert Street, Hawthorn East VIC 3123   
1       04/390 Burwood Highway, Burwood VIC 3125   
2       07/390 Burwood Highway, Burwood VIC 3125   
3                1 Acton Court, Newcomb VIC 3219   
4              1 Aden Court, Thomastown VIC 3074   

                                Suburb      2016      2017      2018  \
0                        East Hawthorn  1.025000  1.036585  1.044118   
1  Canterbury-Surrey Hills-Mont Albert  1.013966  1.060606  1.033766   
2  Canterbury-Surrey Hills-Mont Albert  1.013966  1.060606  1.033766   
3                     Geelong-Newcombe  1.033333  1.024194  1.043307   
4                     Thomastown-Lalor  1.035019  1.037594  1.036232   

       2019      2020      2021      2022      2023      2024      2025  \
0  1.010141  0.947016  0.918728  1.080128  1.163996  1.060503  1.170103   
1  1.037688  0.983051  0.955665  1.063402  1.114881  0.956522  1.054430   
2  1.037688  0.983051  0.9556

In [8]:
# Group by suburb
df_filtered['Income'] = df['Income']



In [9]:
df_suburb_agg = df_filtered.groupby(['Suburb', 'Year']).agg(
    Median_Cost=('Cost', 'median'),
    Median_Growth_Rate=('Growth Rate', 'median'),
    Median_Income=('Income', 'median')
).reset_index()

In [10]:
# Save the aggregated data to a CSV file
df_suburb_agg.to_csv('../../data/curated/suburb_yearly_aggregates.csv', index=False)

# Display the first few rows to verify the result
print(df_suburb_agg.head())


                                  Suburb    Year  Median_Cost  \
0  Albert Park-Middle Park-West St Kilda  2015.0       501.25   
1  Albert Park-Middle Park-West St Kilda  2016.0       515.00   
2  Albert Park-Middle Park-West St Kilda  2017.0       535.25   
3  Albert Park-Middle Park-West St Kilda  2018.0       563.00   
4  Albert Park-Middle Park-West St Kilda  2019.0       580.00   

   Median_Growth_Rate  Median_Income  
0                 NaN        62962.6  
1            1.027431        65352.0  
2            1.039320        66627.0  
3            1.051845        67518.0  
4            1.030195        68933.0  


In [11]:
future_data = df_suburb_agg[df_suburb_agg['Year'].isin([2025, 2026, 2027])]
avg_growth_rates = future_data.groupby('Suburb')['Median_Growth_Rate'].mean().reset_index()
avg_growth_rates.columns = ['Suburb', 'Average_Growth_Rate_2025_2027']
avg_growth_rates_sorted = avg_growth_rates.sort_values(by='Average_Growth_Rate_2025_2027', ascending=False).reset_index(drop=True)

avg_growth_rates_sorted.head(10)

Unnamed: 0,Suburb,Average_Growth_Rate_2025_2027
0,Echuca,1.102561
1,Port Melbourne,1.074349
2,Melton,1.065699
3,Herne Hill-Geelong West,1.059784
4,Wanagaratta,1.058057
5,Lara,1.057788
6,Sunbury,1.056732
7,Yarra Ranges,1.055254
8,Belmont-Grovedale,1.051702
9,Corio,1.05133
