### With predicted rental prices, we are now interested in the growth rates for each SA2

In [2]:
import pandas as pd
import glob
import os

#path = r'../data/curated/2023_2027_rental_prediction/' # use your path
path = r'../data/curated/random_forest_pred/'
all_files = glob.glob(os.path.join(path , "*.csv"))

li = []

for filename in sorted(all_files):
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

merged_df = pd.concat(li, axis=0, ignore_index=True)
predicted = merged_df.groupby(['year', 'sa2_2021'], as_index=False).agg({'predicted_price': 'mean'})
predicted

Unnamed: 0,year,sa2_2021,predicted_price
0,2023,201011001,471.854926
1,2023,201011002,486.516093
2,2023,201011005,404.104062
3,2023,201011006,602.554811
4,2023,201011007,535.614048
...,...,...,...
2490,2027,217031476,636.279286
2491,2027,217041477,553.683354
2492,2027,217041478,580.728736
2493,2027,217041479,544.161420


In [3]:
rental_2022 = pd.read_csv('../data/curated/merged_dataset/2022_merged_data.csv')
rental_2022.dropna(inplace=True)
rental_2022 = rental_2022.groupby(['year', 'sa2_2021'], as_index=False).agg({'weekly_rent': 'mean'})
rental_2022.rename({'weekly_rent': 'predicted_price'}, axis=1, inplace=True)
rental_2022
#rental_2022[rental_2022['sa2_2021']==206041127]
#set(rental_2022['sa2_2021'].unique()) - set(predicted['sa2_2021'].unique())

Unnamed: 0,year,sa2_2021,predicted_price
0,2022,201011001,437.468013
1,2022,201011002,421.270053
2,2022,201011005,402.559524
3,2022,201011006,440.000000
4,2022,201011007,530.625000
...,...,...,...
494,2022,217031476,518.750000
495,2022,217041477,414.069767
496,2022,217041478,771.301370
497,2022,217041479,437.025641


In [4]:
growth_df = pd.concat([rental_2022, predicted], axis=0).reset_index(drop=True)
#df[df['sa2_2021']==201011001]
growth_df

Unnamed: 0,year,sa2_2021,predicted_price
0,2022,201011001,437.468013
1,2022,201011002,421.270053
2,2022,201011005,402.559524
3,2022,201011006,440.000000
4,2022,201011007,530.625000
...,...,...,...
2989,2027,217031476,636.279286
2990,2027,217041477,553.683354
2991,2027,217041478,580.728736
2992,2027,217041479,544.161420


In [5]:
growth_dict = dict()
for sa2 in growth_df['sa2_2021'].unique():
    growth_dict[sa2] = []
    subset = growth_df[growth_df['sa2_2021']==sa2]
    for i in range(len(subset['predicted_price'])-1):
        # (current year price - past year price) / past year price
        growth_rate = (subset.iloc[i+1]['predicted_price'] - subset.iloc[i]['predicted_price'])/subset.iloc[i]['predicted_price']
        growth_dict[sa2].append(growth_rate)


In [6]:
growth_dict

{201011001: [0.07860440425246487,
  0.007411914747089641,
  0.0005335755758959904,
  0.014635743164970256,
  -0.0017406918070273722],
 201011002: [0.15487936665717564,
  0.015035517289905672,
  0.006428452571310287,
  0.08884534170378844,
  0.012910581776783624],
 201011005: [0.0038367958006801997,
  0.0030236642325267675,
  -0.0011677844656397239,
  -0.003010354848793993,
  0.002379167381741618],
 201011006: [0.36944275137741056,
  -0.0016538794691568566,
  -0.13420269408665206,
  -0.07666189273515993,
  -0.0008092467276525432],
 201011007: [0.009402209882775212,
  -0.08554100567261685,
  0.00072734202003707,
  0.02944741781483352,
  0.019375578546484744],
 201011008: [0.4401058933983154,
  0.03248350023434983,
  0.06890965964977615,
  -0.01920380429613009,
  -0.2041146490656727],
 201011481: [0.187223777010277,
  0.0020194541473977762,
  0.010234731245268374,
  0.009783181043176074,
  0.03141060441064297],
 201011482: [0.13011022788521953,
  0.007586718712505883,
  0.0120943873317324

In [7]:
import numpy as np
for key in growth_dict.keys():
    growth_dict[key] = np.mean(growth_dict[key])
growth_dict

{201011001: 0.019888989186678675,
 201011002: 0.05561985199979273,
 201011005: 0.0010122976201029736,
 201011006: 0.03122300767175784,
 201011007: -0.0053176914816972605,
 201011008: 0.06363611998412773,
 201011481: 0.048134349571352444,
 201011482: 0.03440312648252662,
 201011483: 0.03789881299160204,
 201011484: 0.06383181025815032,
 201021009: 0.07327585890859285,
 201021010: 0.06681728967543521,
 201021011: -0.00945479194257888,
 201021012: 0.05480254896843485,
 201031013: 0.09768245152770648,
 201031014: 0.12807697147660638,
 201031015: 0.04053134151146006,
 201031016: 0.0723333126810783,
 201031017: 0.15534011214693855,
 202011018: 0.027893323268067056,
 202011019: 0.021876533692393896,
 202011020: 0.021762479273554635,
 202011021: 0.019809945554956833,
 202011022: 0.02201153918425744,
 202011023: 0.0016431276931974173,
 202011024: -0.006071648658510321,
 202011025: 0.03363743307155139,
 202021026: 0.021237942966676488,
 202021027: 0.028358971237030507,
 202021028: 0.038061921815

In [8]:
# Sort dictionary so that the dictionary values are sorted as descneding order 
# The first subrub in the dictionary = the suburb which is predicted to have the highest average growth rate in the next 5 years 
sorted_dict = dict(sorted(growth_dict.items(), key=lambda item: item[1], reverse=True))
sorted_dict_list = list(sorted_dict)

sa2_list = list(sorted_dict.keys())
growth_list = list(sorted_dict.values())

top_ten_names = []

sa2_dict = pd.read_csv("../data/curated/sa2_vic_2021.csv")

i=0
while i < len(sa2_list):
    selected = sa2_list[i]
    suburb_name = sa2_dict[sa2_dict['SA2_CODE21']==selected]['SA2_NAME21'].to_list()[0]
    top_ten_names.append(suburb_name)
    i+=1

# Make a dataframe with the prediced results with columns of suburb name, its sa2 code and its growth rate
new_csv_name = "../data/curated/final_growing_rates.csv" 

data = {'Suburb_Name': top_ten_names,
        'sa2_2021': sa2_list,
        'Growth_Rate': growth_list}

df = pd.DataFrame(data)
print(df)
df.to_csv(new_csv_name, index=False)

          Suburb_Name   sa2_2021  Growth_Rate
0        Nhill Region  215011390     0.200466
1        West Wimmera  215011393     0.197992
2        Yarriambiack  215011394     0.197407
3              Kerang  215031402     0.169350
4           Rushworth  216011410     0.164419
..                ...        ...          ...
494    Box Hill North  207031164    -0.036016
495  Lorne - Anglesea  203031049    -0.037958
496      Moyne - West  217041478    -0.054437
497   Bundoora - East  209011196    -0.070645
498            Echuca  216011406    -0.072579

[499 rows x 3 columns]
