### With predicted rental prices, we are now interested in the growth rates for each SA2

In [8]:
import pandas as pd
import glob
import os

#path = r'../data/curated/2023_2027_rental_prediction/' # use your path
path = r'../data/curated/random_forest_pred/'
all_files = glob.glob(os.path.join(path , "*.csv"))

li = []

for filename in sorted(all_files):
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

merged_df = pd.concat(li, axis=0, ignore_index=True)
predicted = merged_df.groupby(['year', 'sa2_2021'], as_index=False).agg({'predicted_price': 'mean'})
predicted

Unnamed: 0,year,sa2_2021,predicted_price
0,2023,201011001,414.038888
1,2023,201011002,478.705026
2,2023,201011005,364.566524
3,2023,201011006,411.442043
4,2023,201011007,445.092130
...,...,...,...
2490,2027,217031476,509.522086
2491,2027,217041477,405.742441
2492,2027,217041478,436.913434
2493,2027,217041479,417.283965


In [9]:
rental_2022 = pd.read_csv('../data/curated/merged_dataset/2022_merged_data.csv')
rental_2022.dropna(inplace=True)
rental_2022 = rental_2022.groupby(['year', 'sa2_2021'], as_index=False).agg({'weekly_rent': 'mean'})
rental_2022.rename({'weekly_rent': 'predicted_price'}, axis=1, inplace=True)
rental_2022
#rental_2022[rental_2022['sa2_2021']==206041127]
#set(rental_2022['sa2_2021'].unique()) - set(predicted['sa2_2021'].unique())

Unnamed: 0,year,sa2_2021,predicted_price
0,2022,201011001,437.468013
1,2022,201011002,421.270053
2,2022,201011005,402.559524
3,2022,201011006,440.000000
4,2022,201011007,530.625000
...,...,...,...
494,2022,217031476,518.750000
495,2022,217041477,414.069767
496,2022,217041478,771.301370
497,2022,217041479,437.025641


In [10]:
growth_df = pd.concat([rental_2022, predicted], axis=0).reset_index(drop=True)
#df[df['sa2_2021']==201011001]
growth_df

Unnamed: 0,year,sa2_2021,predicted_price
0,2022,201011001,437.468013
1,2022,201011002,421.270053
2,2022,201011005,402.559524
3,2022,201011006,440.000000
4,2022,201011007,530.625000
...,...,...,...
2989,2027,217031476,509.522086
2990,2027,217041477,405.742441
2991,2027,217041478,436.913434
2992,2027,217041479,417.283965


In [11]:
growth_dict = dict()
for sa2 in growth_df['sa2_2021'].unique():
    growth_dict[sa2] = []
    subset = growth_df[growth_df['sa2_2021']==sa2]
    for i in range(len(subset['predicted_price'])-1):
        # (current year price - past year price) / past year price
        growth_rate = (subset.iloc[i+1]['predicted_price'] - subset.iloc[i]['predicted_price'])/subset.iloc[i]['predicted_price']
        growth_dict[sa2].append(growth_rate)


In [12]:
growth_dict

{201011001: [-0.053556201973400684,
  0.06044017578085334,
  0.008838492254563108,
  0.04266525150221413,
  -0.008132191698189021],
 201011002: [0.13633765841933326,
  0.0,
  0.0,
  0.009566073405129702,
  0.06479244087526771],
 201011005: [-0.09437858797870771,
  0.02168277110344067,
  0.021787934926986007,
  0.015666734980598163,
  0.009321762865639566],
 201011006: [-0.0649044473140496,
  0.04918664038725564,
  0.04218115993251384,
  0.018662523962353,
  0.011675756867513466],
 201011007: [-0.1611926878680801,
  0.013611249428292562,
  0.0,
  0.028101023031366468,
  -0.0027452461100208802],
 201011008: [-0.09066354611516611,
  0.014501470442050633,
  0.12848821272117564,
  -0.0011679945560478606,
  0.023803963124806312],
 201011481: [0.09285086859873622,
  -0.000571690127385272,
  0.020119376574718482,
  0.04546626689568579,
  0.0],
 201011482: [0.018822749204515453,
  0.032691147263405766,
  0.028564470716425306,
  0.06818651295513323,
  0.0021731659617696113],
 201011483: [0.02757

In [13]:
import numpy as np
for key in growth_dict.keys():
    growth_dict[key] = np.mean(growth_dict[key])
growth_dict

{201011001: 0.010051105173208175,
 201011002: 0.042139234539946134,
 201011005: -0.005183876820408663,
 201011006: 0.01136032676711727,
 201011007: -0.024445132303688392,
 201011008: 0.014992421123363723,
 201011481: 0.03157296438835104,
 201011482: 0.03008760922024987,
 201011483: 0.011958562988539076,
 201011484: 0.020366348033810718,
 201021009: -0.0005237919883734822,
 201021010: 0.007872910375463602,
 201021011: -0.012120624399120985,
 201021012: 0.006434604345342107,
 201031013: 0.005968964910147523,
 201031014: 0.010582116956816882,
 201031015: -0.0009910584065278912,
 201031016: 0.04670684864209753,
 201031017: 0.01276606093562962,
 202011018: 0.0233428945550891,
 202011019: -0.010925792073055468,
 202011020: 0.00796578346766533,
 202011021: -0.0003191912914323154,
 202011022: 0.01612141307015555,
 202011023: -0.0030147041518496427,
 202011024: -0.005678830271583746,
 202011025: 0.009382698829298331,
 202021026: -0.007320313854885817,
 202021027: 0.015228899480010055,
 20202102

In [14]:
# Sort dictionary so that the dictionary values are sorted as descneding order 
# The first subrub in the dictionary = the suburb which is predicted to have the highest average growth rate in the next 5 years 
sorted_dict = dict(sorted(growth_dict.items(), key=lambda item: item[1], reverse=True))
sorted_dict_list = list(sorted_dict)

sa2_list = list(sorted_dict.keys())
growth_list = list(sorted_dict.values())

top_ten_names = []

sa2_dict = pd.read_csv("../data/curated/sa2_vic_2021.csv")

i=0
while i < len(sa2_list):
    selected = sa2_list[i]
    suburb_name = sa2_dict[sa2_dict['SA2_CODE21']==selected]['SA2_NAME21'].to_list()[0]
    top_ten_names.append(suburb_name)
    i+=1

# Make a dataframe with the prediced results with columns of suburb name, its sa2 code and its growth rate
new_csv_name = "../data/curated/final_growing_rates_rf.csv" 

data = {'Suburb_Name': top_ten_names,
        'sa2_2021': sa2_list,
        'Growth_Rate': growth_list}

df = pd.DataFrame(data)
print(df)
df.to_csv(new_csv_name, index=False)

             Suburb_Name   sa2_2021  Growth_Rate
0              Frankston  214011371     0.102386
1               Numurkah  216021413     0.100529
2     Narre Warren North  212021299     0.099749
3           Malvern East  208041195     0.093483
4             Langwarrin  214011374     0.091307
..                   ...        ...          ...
494       Sunshine North  213011337    -0.061142
495  Kilmore - Broadford  204011056    -0.072298
496         Moyne - West  217041478    -0.080554
497               Echuca  216011406    -0.087210
498      Bundoora - East  209011196    -0.100505

[499 rows x 3 columns]
