### With predicted rental prices, we are now interested in the growth rates for each SA2

In [1]:
import pandas as pd
import glob
import os

#path = r'../data/curated/2023_2027_rental_prediction/' # use your path
path = r'../data/curated/random_forest_pred/'
all_files = glob.glob(os.path.join(path , "*.csv"))

li = []

for filename in sorted(all_files):
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

merged_df = pd.concat(li, axis=0, ignore_index=True)
predicted = merged_df.groupby(['year', 'sa2_2021'], as_index=False).agg({'predicted_price': 'mean'})
predicted

Unnamed: 0,year,sa2_2021,predicted_price
0,2023,201011001,453.562520
1,2023,201011002,484.825188
2,2023,201011005,392.232222
3,2023,201011006,483.315186
4,2023,201011007,485.545000
...,...,...,...
2490,2027,217031476,482.363333
2491,2027,217041477,431.872708
2492,2027,217041478,426.210997
2493,2027,217041479,404.354975


In [2]:
rental_2022 = pd.read_csv('../data/curated/merged_dataset/2022_merged_data.csv')
rental_2022.dropna(inplace=True)
rental_2022 = rental_2022.groupby(['year', 'sa2_2021'], as_index=False).agg({'weekly_rent': 'mean'})
rental_2022.rename({'weekly_rent': 'predicted_price'}, axis=1, inplace=True)
rental_2022
#rental_2022[rental_2022['sa2_2021']==206041127]
#set(rental_2022['sa2_2021'].unique()) - set(predicted['sa2_2021'].unique())

Unnamed: 0,year,sa2_2021,predicted_price
0,2022,201011001,437.468013
1,2022,201011002,421.270053
2,2022,201011005,402.559524
3,2022,201011006,440.000000
4,2022,201011007,530.625000
...,...,...,...
494,2022,217031476,518.750000
495,2022,217041477,414.069767
496,2022,217041478,771.301370
497,2022,217041479,437.025641


In [3]:
growth_df = pd.concat([rental_2022, predicted], axis=0).reset_index(drop=True)
#df[df['sa2_2021']==201011001]
growth_df

Unnamed: 0,year,sa2_2021,predicted_price
0,2022,201011001,437.468013
1,2022,201011002,421.270053
2,2022,201011005,402.559524
3,2022,201011006,440.000000
4,2022,201011007,530.625000
...,...,...,...
2989,2027,217031476,482.363333
2990,2027,217041477,431.872708
2991,2027,217041478,426.210997
2992,2027,217041479,404.354975


In [4]:
growth_dict = dict()
for sa2 in growth_df['sa2_2021'].unique():
    growth_dict[sa2] = []
    subset = growth_df[growth_df['sa2_2021']==sa2]
    for i in range(len(subset['predicted_price'])-1):
        # (current year price - past year price) / past year price
        growth_rate = (subset.iloc[i+1]['predicted_price'] - subset.iloc[i]['predicted_price'])/subset.iloc[i]['predicted_price']
        growth_dict[sa2].append(growth_rate)


In [5]:
growth_dict

{201011001: [0.03679013294176113,
  0.009411017581387869,
  -0.003145099889859999,
  0.012200295534146926,
  0.002618060782771042],
 201011002: [0.1508655414067728,
  0.03279818127551463,
  0.004531748045588778,
  0.0833677619118761,
  0.010802179280164761],
 201011005: [-0.025654098279856055,
  -0.00030434776450456453,
  -0.0038387121054473485,
  0.004703409040401866,
  0.0],
 201011006: [0.09844360406008147,
  -0.0001270871720557671,
  0.018891321733717603,
  -0.03186380077778466,
  0.022652492877406505],
 201011007: [-0.08495641931684342,
  0.0037389256745856566,
  0.0032872660120632675,
  0.006952646334500784,
  0.041640256281581564],
 201011008: [0.10358930493533158,
  0.03480443250198127,
  0.11712217588140271,
  0.005881940204586631,
  -0.10694570950213954],
 201011481: [0.08529223091564837,
  0.015929054034132716,
  0.01988449662128661,
  0.006383720868689033,
  0.00566991341666975],
 201011482: [0.07423614844021395,
  0.030399504495575068,
  0.006935082235228049,
  0.018817597

In [6]:
import numpy as np
for key in growth_dict.keys():
    growth_dict[key] = np.mean(growth_dict[key])
growth_dict

{201011001: 0.011574881390041394,
 201011002: 0.05647308238398341,
 201011005: -0.0050187498218812205,
 201011006: 0.02159930614427303,
 201011007: -0.00586746500282243,
 201011008: 0.03089042880423254,
 201011481: 0.026631883171285294,
 201011482: 0.026372342151923467,
 201011483: 0.025872445800938244,
 201011484: 0.04087831804325522,
 201021009: 0.056835398431694606,
 201021010: 0.03523209301388562,
 201021011: -0.024411022639459434,
 201021012: 0.045256139178823096,
 201031013: 0.022299322537278694,
 201031014: 0.015307410848295577,
 201031015: 0.018994564396342062,
 201031016: 0.03155045406045874,
 201031017: 0.047248776900383246,
 202011018: 0.020169199920524395,
 202011019: -0.00786102725693478,
 202011020: 0.013682224366518513,
 202011021: 0.01732807816420125,
 202011022: 0.0003662849655524852,
 202011023: 0.00014921895306135995,
 202011024: -0.011612958607240943,
 202011025: 0.013195839622180216,
 202021026: 0.007948537499554171,
 202021027: 0.009114743610381832,
 202021028: 0.

In [7]:
# Sort dictionary so that the dictionary values are sorted as descneding order 
# The first subrub in the dictionary = the suburb which is predicted to have the highest average growth rate in the next 5 years 
sorted_dict = dict(sorted(growth_dict.items(), key=lambda item: item[1], reverse=True))
sorted_dict_list = list(sorted_dict)

sa2_list = list(sorted_dict.keys())
growth_list = list(sorted_dict.values())

top_ten_names = []

sa2_dict = pd.read_csv("../data/curated/sa2_vic_2021.csv")

i=0
while i < len(sa2_list):
    selected = sa2_list[i]
    suburb_name = sa2_dict[sa2_dict['SA2_CODE21']==selected]['SA2_NAME21'].to_list()[0]
    top_ten_names.append(suburb_name)
    i+=1

# Make a dataframe with the prediced results with columns of suburb name, its sa2 code and its growth rate
new_csv_name = "../data/curated/final_growing_rates_rf.csv" 

data = {'Suburb_Name': top_ten_names,
        'sa2_2021': sa2_list,
        'Growth_Rate': growth_list}

df = pd.DataFrame(data)
print(df)
df.to_csv(new_csv_name, index=False)

             Suburb_Name   sa2_2021  Growth_Rate
0           West Wimmera  215011393     0.132695
1     Narre Warren North  212021299     0.128500
2      Point Cook - East  213051464     0.123002
3                 Elwood  206051129     0.108843
4        St Kilda - West  206051514     0.104327
..                   ...        ...          ...
494  Kilmore - Broadford  204011056    -0.046879
495      Bundoora - West  209041217    -0.059356
496         Moyne - West  217041478    -0.080701
497      Bundoora - East  209011196    -0.090306
498               Echuca  216011406    -0.113177

[499 rows x 3 columns]
