### With predicted rental prices, we are now interested in the growth rates for each SA2

In [1]:
import pandas as pd
import glob
import os

path = r'../data/curated/2023_2027_rental_prediction/' # use your path
all_files = glob.glob(os.path.join(path , "*.csv"))

li = []

for filename in sorted(all_files):
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

merged_df = pd.concat(li, axis=0, ignore_index=True)
predicted = merged_df.groupby(['year', 'sa2_2021'], as_index=False).agg({'predicted_price': 'mean'})
predicted

Unnamed: 0,year,sa2_2021,predicted_price
0,2023,201011001,409.906007
1,2023,201011002,442.836277
2,2023,201011005,399.956139
3,2023,201011006,410.364018
4,2023,201011007,508.607173
...,...,...,...
2490,2027,217031476,517.442643
2491,2027,217041477,478.447785
2492,2027,217041478,631.874929
2493,2027,217041479,500.062428


In [2]:
rental_2022 = pd.read_csv('../data/curated/merged_dataset/2022_merged_data.csv')
rental_2022.dropna(inplace=True)
rental_2022 = rental_2022.groupby(['year', 'sa2_2021'], as_index=False).agg({'weekly_rent': 'mean'})
rental_2022.rename({'weekly_rent': 'predicted_price'}, axis=1, inplace=True)
rental_2022
#rental_2022[rental_2022['sa2_2021']==206041127]
#set(rental_2022['sa2_2021'].unique()) - set(predicted['sa2_2021'].unique())

Unnamed: 0,year,sa2_2021,predicted_price
0,2022,201011001,437.468013
1,2022,201011002,421.270053
2,2022,201011005,402.559524
3,2022,201011006,440.000000
4,2022,201011007,530.625000
...,...,...,...
494,2022,217031476,518.750000
495,2022,217041477,414.069767
496,2022,217041478,771.301370
497,2022,217041479,437.025641


In [3]:
growth_df = pd.concat([rental_2022, predicted], axis=0).reset_index(drop=True)
#df[df['sa2_2021']==201011001]
growth_df

Unnamed: 0,year,sa2_2021,predicted_price
0,2022,201011001,437.468013
1,2022,201011002,421.270053
2,2022,201011005,402.559524
3,2022,201011006,440.000000
4,2022,201011007,530.625000
...,...,...,...
2989,2027,217031476,517.442643
2990,2027,217041477,478.447785
2991,2027,217041478,631.874929
2992,2027,217041479,500.062428


In [4]:
subset = growth_df[growth_df['sa2_2021']==201011001]
subset
growth_dict = dict()
for sa2 in growth_df['sa2_2021'].unique():
    growth_dict[sa2] = []
    subset = growth_df[growth_df['sa2_2021']==sa2]
    for i in range(len(subset['predicted_price'])-1):
        # (current year price - past year price) / past year price
        growth_rate = (subset.iloc[i+1]['predicted_price'] - subset.iloc[i]['predicted_price'])/subset.iloc[i]['predicted_price']
        growth_dict[sa2].append(growth_rate)


In [5]:
growth_dict

{201011001: [-0.06300347909123724,
  0.03555018325740427,
  0.03432602102514226,
  0.03314286792315705,
  0.032030575935021094],
 201011002: [0.051193346883381896,
  0.03287660621763329,
  0.03180966838433538,
  0.030804574879296644,
  0.029822258389008063],
 201011005: [-0.00646707994582338,
  0.03634872456790848,
  0.03506062900082062,
  0.03384614244432817,
  0.03267035468502969],
 201011006: [-0.06735450351611018,
  0.0354268281057495,
  0.03421098001778446,
  0.03303536291522644,
  0.03193303739309591],
 201011007: [-0.04149413783539888,
  0.028658901681093106,
  0.027842554980746635,
  0.027066903123111562,
  0.026299200211010284],
 201011008: [0.058754923583528955,
  0.037481789040185176,
  0.03612372639396047,
  0.03481803985482797,
  0.033594995278166326],
 201011481: [0.11000970962945511,
  0.03630422971742833,
  0.03500990142031971,
  0.033789860343218354,
  0.032635417440929085],
 201011482: [0.1285414092673241,
  0.03250179525436275,
  0.031458460590281845,
  0.03047486576

In [6]:
import numpy as np
for key in growth_dict.keys():
    growth_dict[key] = np.mean(growth_dict[key])
growth_dict

{201011001: 0.014409233809897486,
 201011002: 0.035301290950731055,
 201011005: 0.026291754150452717,
 201011006: 0.013450340983149229,
 201011007: 0.013674684432112541,
 201011008: 0.040154694830133786,
 201011481: 0.04954982371027011,
 201011482: 0.05049782119328376,
 201011483: 0.04712612323272631,
 201011484: 0.04614471593394036,
 201021009: 0.033360160828571656,
 201021010: 0.04674057129684713,
 201021011: 0.019253598612506632,
 201021012: 0.01849805148825713,
 201031013: 0.034959770154961614,
 201031014: 0.03665612927657526,
 201031015: 0.02792535006479968,
 201031016: 0.040675516126071065,
 201031017: 0.03914203123784356,
 202011018: 0.06553934768075363,
 202011019: 0.0012023403203279368,
 202011020: 0.043337442544852015,
 202011021: 0.05240567638451714,
 202011022: 0.030179015469500425,
 202011023: 0.011975997472615695,
 202011024: 0.01566868110345215,
 202011025: 0.017544312697641437,
 202021026: 0.014965345962720233,
 202021027: 0.05705785940673378,
 202021028: 0.025677828534